Пример #1
0
    def setUp(self):
        """
        _setUp_

        Setup couchdb and the test environment
        """
        super(ResubmitBlockTest, self).setUp()

        self.group = 'unknown'
        self.user = '******'

        # Set external test helpers
        self.testInit = TestInitCouchApp(__file__)
        self.testInit.setLogging()
        self.testInit.setupCouch("resubmitblock_t", "ACDC", "GroupUser")

        # Define test environment
        self.couchUrl = os.environ["COUCHURL"]
        self.acdcDBName = 'resubmitblock_t'
        self.validLocations = ['T2_US_Nebraska', 'T1_US_FNAL_Disk', 'T1_UK_RAL_Disk']
        self.siteWhitelist = ['T2_XX_SiteA']
        siteDB = SiteDB()
        #Convert phedex node name to a valid processing site name
        self.PSNs = siteDB.PNNstoPSNs(self.validLocations)
        self.workflowName = 'dballest_ReReco_workflow'
        couchServer = CouchServer(dburl=self.couchUrl)
        self.acdcDB = couchServer.connectDatabase(self.acdcDBName, create=False)
        user = makeUser(self.group, '*****@*****.**', self.couchUrl, self.acdcDBName)
        user.create()

        return
Пример #2
0
class ResubmitBlock(StartPolicyInterface):
    """Split elements into blocks"""
    def __init__(self, **args):
        StartPolicyInterface.__init__(self, **args)
        self.args.setdefault('SliceType', 'NumberOfFiles')
        self.args.setdefault('SliceSize', 1)
        self.args.setdefault('SplittingAlgo', 'LumiBased')
        self.lumiType = "NumberOfLumis"

        # Define how to handle the different splitting algorithms
        self.algoMapping = {
            'Harvest': self.singleChunk,
            'ParentlessMergeBySize': self.singleChunk,
            'MinFileBased': self.singleChunk,
            'LumiBased': self.singleChunk,
            'EventAwareLumiBased': self.singleChunk,
            'EventBased': self.singleChunk
        }
        self.unsupportedAlgos = ['WMBSMergeBySize', 'SiblingProcessingBased']
        self.defaultAlgo = self.fixedSizeChunk
        self.sites = []
        self.siteDB = SiteDB()

    def split(self):
        """Apply policy to spec"""
        # Prepare a site list in case we need it
        siteWhitelist = self.initialTask.siteWhitelist()
        siteBlacklist = self.initialTask.siteBlacklist()
        self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for block in self.validBlocks(self.initialTask):
            parentList = {}
            parentFlag = False
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                parentList[block["Name"]] = block['Sites']

            self.newQueueElement(
                Inputs={block['Name']: block['Sites']},
                ParentFlag=parentFlag,
                ParentData=parentList,
                NumberOfLumis=block[self.lumiType],
                NumberOfFiles=block['NumberOfFiles'],
                NumberOfEvents=block['NumberOfEvents'],
                Jobs=ceil(
                    float(block[self.args['SliceType']]) /
                    float(self.args['SliceSize'])),
                ACDC=block['ACDC'],
                NoInputUpdate=self.initialTask.getTrustSitelists().get(
                    'trustlists'),
                NoPileupUpdate=self.initialTask.getTrustSitelists().get(
                    'trustPUlists'))

    def validate(self):
        """Check args and spec work with block splitting"""
        StartPolicyInterface.validateCommon(self)

    def validBlocks(self, task):
        """Return blocks that pass the input data restriction according
           to the splitting algorithm"""
        validBlocks = []

        acdcInfo = task.getInputACDC()
        if not acdcInfo:
            raise WorkQueueWMSpecError(
                self.wmspec, 'No acdc section for %s' % task.getPathName())
        acdc = DataCollectionService(acdcInfo["server"], acdcInfo["database"])
        if self.data:
            acdcBlockSplit = ACDCBlock.splitBlockName(self.data.keys()[0])
        else:
            # if self.data is not passed, assume the the data is input dataset
            # from the spec
            acdcBlockSplit = False

        if acdcBlockSplit:
            dbsBlock = {}
            dbsBlock['Name'] = self.data.keys()[0]
            block = acdc.getChunkInfo(acdcInfo['collection'],
                                      acdcBlockSplit['TaskName'],
                                      acdcBlockSplit['Offset'],
                                      acdcBlockSplit['NumOfFiles'])
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            dbsBlock['ACDC'] = acdcInfo
            if task.getTrustSitelists().get('trustlists'):
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            validBlocks.append(dbsBlock)
        else:
            if self.args['SplittingAlgo'] in self.unsupportedAlgos:
                raise WorkQueueWMSpecError(
                    self.wmspec, 'ACDC is not supported for %s' %
                    self.args['SplittingAlgo'])
            splittingFunc = self.defaultAlgo
            if self.args['SplittingAlgo'] in self.algoMapping:
                splittingFunc = self.algoMapping[self.args['SplittingAlgo']]
            validBlocks = splittingFunc(acdc, acdcInfo, task)

        return validBlocks

    def fixedSizeChunk(self, acdc, acdcInfo, task):
        """Return a set of blocks with a fixed number of ACDC records"""
        fixedSizeBlocks = []
        chunkSize = 250
        acdcBlocks = acdc.chunkFileset(acdcInfo['collection'],
                                       acdcInfo['fileset'], chunkSize)
        for block in acdcBlocks:
            dbsBlock = {}
            dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                              acdcInfo["fileset"],
                                              block['offset'], block['files'])
            dbsBlock['NumberOfFiles'] = block['files']
            dbsBlock['NumberOfEvents'] = block['events']
            dbsBlock['NumberOfLumis'] = block['lumis']
            if task.getTrustSitelists().get('trustlists'):
                dbsBlock["Sites"] = self.sites
            else:
                dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(block["locations"])
            dbsBlock['ACDC'] = acdcInfo
            if dbsBlock['NumberOfFiles']:
                fixedSizeBlocks.append(dbsBlock)
        return fixedSizeBlocks

    def singleChunk(self, acdc, acdcInfo, task):
        """Return a single block (inside a list) with all associated ACDC records"""
        result = []
        acdcBlock = acdc.singleChunkFileset(acdcInfo['collection'],
                                            acdcInfo['fileset'])
        dbsBlock = {}
        dbsBlock['Name'] = ACDCBlock.name(self.wmspec.name(),
                                          acdcInfo["fileset"],
                                          acdcBlock['offset'],
                                          acdcBlock['files'])
        dbsBlock['NumberOfFiles'] = acdcBlock['files']
        dbsBlock['NumberOfEvents'] = acdcBlock['events']
        dbsBlock['NumberOfLumis'] = acdcBlock['lumis']
        if task.getTrustSitelists().get('trustlists'):
            dbsBlock["Sites"] = self.sites
        else:
            dbsBlock["Sites"] = self.siteDB.PNNstoPSNs(acdcBlock["locations"])
        dbsBlock['ACDC'] = acdcInfo
        if dbsBlock['NumberOfFiles']:
            result.append(dbsBlock)

        return result
Пример #3
0
class RequestQuery:

    def __init__(self,config):
        self.br=Browser()

        self.config = config
        
        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.phedex = PhEDEx({"endpoint":"https://cmsweb.cern.ch/phedex/datasvc/json/prod/"}, "json")
        self.dbsPhys01 = DbsApi(url = dbs_base_url+"phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url = dbs_base_url+"phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url = dbs_base_url+"phys03/DBSReader/")
        
    def __del__(self):
        self.br.close()

    def login2Savannah(self):
        """
        login2Savannah log into savannah with the given parameters in the config (username and password) 
        User must have admin priviledges for store results requests
        """
        login_page='https://savannah.cern.ch/account/login.php?uri=%2F'
        savannah_page='https://savannah.cern.ch/task/?group=cms-storeresults'
        
        self.br.open(login_page)

        ## 'Search' form is form 0
        ## login form is form 1
        self.br.select_form(nr=1)

        username = self.config["SavannahUser"]
    
        self.br['form_loginname']=username
        self.br['form_pw']=self.config["SavannahPasswd"]
        
        self.br.submit()
        
        response = self.br.open(savannah_page)
        
        # Check to see if login was successful
        if not re.search('Logged in as ' + username, response.read()):
            print('login unsuccessful, please check your username and password')
            return False
        else:
            return True
    
    def selectQueryForm(self,**kargs):       
        """
        selectQueryForm create the browser view to get all the store result tickets from savannah
        """
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")

            ## Use right query form labelled Test
            control = self.br.find_control("report_id",type="select")

            for item in control.items:
                if item.attrs['label'] == "Test":
                    control.value = [item.attrs['value']]
                    
            ##select number of entries displayed per page
            control = self.br.find_control("chunksz",type="text")
            control.value = "150"

            ##check additional searching parameter
            for arg in kargs:
                if arg == "approval_status":
                    control = self.br.find_control("resolution_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

                elif arg == "task_status":
                    control = self.br.find_control("status_id",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]
                            
                elif arg == "team":
                    control = self.br.find_control("custom_sb5",type="select")
                    for item in control.items:
                        if item.attrs['label'] == kargs[arg].strip():
                            control.value = [item.attrs['value']]

            response = self.br.submit()
            response.read()

        return

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """
        
        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response=br.open(url)
        soup = BeautifulSoup(response.read())
        
        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw={}
        
        # Fill the dictionary
        for arch in soup.find_all('architecture'): 
            for cmssw in arch.find_all('project'): 
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel]=[]
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)
        
        return archByCmssw
      
    def createValueDicts(self):       
        """
        Init dictionaries by value/label:
        - Releases by Value
        - Physics group by value
        - DBS url by value
        - DBS rul by label
        - Status of savannah request by value 
        - Status of savannah ticket by value (Open/Closed/Any)
        """      
        if self.isLoggedIn:
            self.br.select_form(name="bug_form")
            
            control = self.br.find_control("custom_sb2",type="select")
            self.ReleaseByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb3",type="select")
            self.GroupByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("custom_sb4",type="select")
            self.DBSByValueDict = self.getLabelByValueDict(control)
            self.DBSByLabelDict = self.getValueByLabelDict(control)

            control = self.br.find_control("resolution_id",type="select")
            self.StatusByValueDict = self.getLabelByValueDict(control)

            control = self.br.find_control("status_id",type="select")
            self.TicketStatusByLabelDict = self.getValueByLabelDict(control)

        return
    
    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """
        
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True,dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True,dataset=data)

        pnnList = set()
        for block in response:
            pnnList.add(block['origin_site_name'])
        psnList = self.mySiteDB.PNNstoPSNs(pnnList)
        
        return psnList, list(pnnList)

    def phEDExNodetocmsName(self, nodeList):
        """
        Convert PhEDEx node name list to cms names list 
        """
        names = []
        for node in nodeList:
            name = node.replace('_MSS',
                                '').replace('_Disk',
                                    '').replace('_Buffer',
                                        '').replace('_Export', '')
            if name not in names:
                names.append(name)
        return names
    
    def setGlobalTagFromOrigin(self, dbs_url,input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """
        
        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)
        
        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'
            
        return globalTag
    
    def isDataAtUrl(self, dbs_url,input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True
         
    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """   
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label
                
        return d
    
    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d
    
    def getRequests(self,**kargs):
        """
        getRequests Actually goes through all the savannah requests and create json files if the 
        ticket is not Closed and the status of the item is Done.
        It also reports back the summary of the requests in savannah
        """
        requests = []
        
        # Open Browser and login into Savannah
        self.br=Browser()
        self.isLoggedIn = self.login2Savannah()
        
        if self.isLoggedIn:
            if not kargs:
                self.selectQueryForm(approval_status='1',task_status='0')
            else:
                self.selectQueryForm(**kargs)
            self.createValueDicts()
        
            self.br.select_form(name="bug_form")
            response = self.br.submit()

            html_ouput = response.read()
            
            scramArchByCMSSW = self.getScramArchByCMSSW()
            self.nodeMappings = self.phedex.getNodeMap()
            
            for link in self.br.links(text_regex="#[0-9]+"):
                response = self.br.follow_link(link)
                
                try:
                    ## Get Information
                    self.br.select_form(name="item_form")

                    ## remove leading &nbsp and # from task
                    task = link.text.replace('#','').decode('utf-8').strip()
                    print("Processing ticket: %s" % task)
                    
                    ## Get input dataset name
                    control = self.br.find_control("custom_tf1",type="text")
                    input_dataset = control.value
                    input_primary_dataset = input_dataset.split('/')[1].replace(' ','')
                    input_processed_dataset = input_dataset.split('/')[2].replace(' ','')
                    data_tier = input_dataset.split('/')[3].replace(' ','')
                    
                    ## Get DBS URL by Drop Down
                    control = self.br.find_control("custom_sb4",type="select")
                    dbs_url = self.DBSByValueDict[control.value[0]]

                    ## Get DBS URL by text field (for old entries)
                    if dbs_url=='None':
                        control = self.br.find_control("custom_tf4",type="text")
                        dbs_url = control.value.replace(' ','')
                    else: # Transform input value to a valid DBS url
                        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
                        dbs_url = dbs_base_url+dbs_url+"/DBSReader"
                        
                    ## Get Release
                    control = self.br.find_control("custom_sb2",type="select")
                    release_id = control.value
                    
                    ## Get current request status
                    control = self.br.find_control("status_id",type="select")
                    request_status_id = control.value
                    RequestStatusByValueDict = self.getLabelByValueDict(control)
                    
                    # close the request if deprecated release was used
                    try:
                        release = self.ReleaseByValueDict[release_id[0]]
                    except:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid anymore, since the given CMSSW release is deprecated. If your request should be still processed, please reopen the request and update the CMSSW release to a more recent *working* release.\n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
                            self.br.back()
                            print("I tried to Close ticket %s due to CMSSW not valid" % task)
                            continue
                    
                    # close the request if release has not ScramArch match
                    if release not in scramArchByCMSSW:
                        if len(self.ReleaseByValueDict)>0 and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                            msg = "Your request is not valid, there is no ScramArch match for the given CMSSW release.\n"
                            msg+= "If your request should be still processed, please reopen the request and update the CMSSW release according to: https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML \n"
                            msg+= "\n"
                            msg+= "Thanks,\n"
                            msg+= "Your StoreResults team"
                            self.closeRequest(task,msg)
                            self.br.back()
                            print("I tried to Close ticket %s due to ScramArch mismatch" % task)
                            continue
                    else: 
                        index=len(scramArchByCMSSW[release])
                        scram_arch = scramArchByCMSSW[release][index-1]

                    # close the request if dataset is not at dbs url
                    try:
                        data_at_url = self.isDataAtUrl(dbs_url,input_dataset)
                    except:
                        print('I got an error trying to look for dataset %s at %s, please look at this ticket: %s' %(input_dataset,dbs_url,task))
                        continue
                    if not data_at_url:
                        msg = "Your request is not valid, I could not find the given dataset at %s\n" % dbs_url
                        msg+= "If your request should be still processed, please reopen the request and change DBS url properly \n"
                        msg+= "\n"
                        msg+= "Thanks,\n"
                        msg+= "Your StoreResults team"
                        self.closeRequest(task,msg)
                        self.br.back()
                        print("I tried to Close ticket %s, dataset is not at DBS url" % task)
                        continue
                        
                    # Avoid not approved Tickets
                    #if not RequestStatusByValueDict[request_status_id[0]] == "Done":
                    #    continue

                    ## Get Physics Group
                    control = self.br.find_control("custom_sb3",type="select")
                    group_id = control.value[0]
                    group_squad = 'cms-storeresults-'+self.GroupByValueDict[group_id].replace("-","_").lower()

                    ## Get Dataset Version
                    control = self.br.find_control("custom_tf3",type="text")
                    dataset_version = control.value.replace(' ','')
                    if dataset_version == "": dataset_version = '1'
                                        
                    ## Get current status
                    control = self.br.find_control("resolution_id",type="select")
                    status_id = control.value

                    ## Get assigned to
                    control = self.br.find_control("assigned_to",type="select")
                    AssignedToByValueDict = self.getLabelByValueDict(control)
                    assignedTo_id = control.value

                    ##Assign task to the physics group squad
                    if AssignedToByValueDict[assignedTo_id[0]]!=group_squad:
                        assignedTo_id = [self.getValueByLabelDict(control)[group_squad]]
                        control.value = assignedTo_id
                        self.br.submit()

                    # Set default Adquisition Era for StoreResults 
                    acquisitionEra = "StoreResults"

                    ## Construction of the new dataset name (ProcessingString)
                    ## remove leading hypernews or physics group name and StoreResults+Version
                    if input_processed_dataset.find(self.GroupByValueDict[group_id])==0:
                        new_dataset = input_processed_dataset.replace(self.GroupByValueDict[group_id],"",1)
                    else:
                        stripped_dataset = input_processed_dataset.split("-")[1:]
                        new_dataset = '_'.join(stripped_dataset)
                    
                except Exception as ex:
                    self.br.back()
                    print("There is a problem with this ticket %s, please have a look to the error:" % task)
                    print(str(ex))
                    print(traceback.format_exc())
                    continue
                
                self.br.back()
                
                # Get dataset site info:
                psnList, pnnList = self.getDatasetOriginSites(dbs_url,input_dataset)
                
                infoDict = {}
                # Build store results json
                # First add all the defaults values
                infoDict["RequestType"] = "StoreResults"
                infoDict["UnmergedLFNBase"] = "/store/unmerged" 
                infoDict["MergedLFNBase"] = "/store/results/" + self.GroupByValueDict[group_id].replace("-","_").lower()
                infoDict["MinMergeSize"] = 1500000000
                infoDict["MaxMergeSize"] = 5000000000
                infoDict["MaxMergeEvents"] = 100000
                infoDict["TimePerEvent"] = 40
                infoDict["SizePerEvent"] = 512.0
                infoDict["Memory"] = 2394
                infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"                                        
                infoDict["Group"] = "DATAOPS"
                infoDict["DbsUrl"] = dbs_url
                
                # Add all the information pulled from Savannah
                infoDict["AcquisitionEra"] = acquisitionEra
                infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(dbs_url,input_dataset)
                infoDict["DataTier"] = data_tier
                infoDict["InputDataset"] = input_dataset
                infoDict["ProcessingString"] = new_dataset
                infoDict["CMSSWVersion"] = release
                infoDict["ScramArch"] = scram_arch
                infoDict["ProcessingVersion"] = dataset_version                    
                infoDict["SiteWhitelist"] = psnList
                
                # Create report for Migration2Global
                report = {}
                 
                #Fill json file, if status is done
                if self.StatusByValueDict[status_id[0]]=='Done' and RequestStatusByValueDict[request_status_id[0]] != "Closed":
                    self.writeJSONFile(task, infoDict)
                    report["json"] = 'y'
                else:
                    report["json"] = 'n'
                    
                report["task"] = int(task)
                report["InputDataset"] = input_dataset
                report["ProcessingString"] = new_dataset
                report["ticketStatus"] = self.StatusByValueDict[status_id[0]]
                report["assignedTo"] = AssignedToByValueDict[assignedTo_id[0]]
                report["localUrl"] = dbs_url
                report["sites"] = psnList
                report["pnns"] = pnnList

                # if the request is closed, change the item status to report to Closed
                if report["ticketStatus"] == "Done" and RequestStatusByValueDict[request_status_id[0]] == "Closed":
                    report["ticketStatus"] = "Closed"

                requests.append(report)
                    
            # Print out report
            self.printReport(requests)
        # Close connections
        self.br.close()
        
        return requests

    def closeRequest(self,task,msg):
        """
        This close a specific savannag ticket
        Insert a message in the ticket
        """
        if self.isLoggedIn:
            #self.createValueDicts()
            
            response = self.br.open('https://savannah.cern.ch/task/?'+str(task))

            html = response.read()

            self.br.select_form(name="item_form")

            control = self.br.find_control("status_id",type="select")
            control.value = [self.TicketStatusByLabelDict["Closed"]]

            #Put reason to the comment field
            control = self.br.find_control("comment",type="textarea")
            control.value = msg
                        
            #DBS Drop Down is a mandatory field, if set to None (for old requests), it is not possible to close the request
            self.setDBSDropDown()
                        
            self.br.submit()

            #remove JSON ticket
            self.removeJSONFile(task)
            
            self.br.back()
        return

    def setDBSDropDown(self):
        ## Get DBS URL by Drop Down
        control = self.br.find_control("custom_sb4",type="select")
        dbs_url = self.DBSByValueDict[control.value[0]]

        ## Get DBS URL by text field (for old entries)
        if dbs_url=='None':
            tmp = self.br.find_control("custom_tf4",type="text")
            dbs_url = tmp.value.replace(' ','')

            if dbs_url.find("phys01")!=-1:
                control.value = [self.DBSByLabelDict["phys01"]]
            elif dbs_url.find("phys02")!=-1:
                control.value = [self.DBSByLabelDict["phys02"]]
            elif dbs_url.find("phys03")!=-1:
                control.value = [self.DBSByLabelDict["phys03"]]
            else:
                msg = 'DBS URL of the old request is neither phys01, phys02 nor phys03. Please, check!'
                print(msg)
                raise RuntimeError(msg)

        return

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'
        if not os.access(filename,os.F_OK):
            jsonfile = open(filename,'w')
            request = {'createRequest':infoDict} ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request,sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self,task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"]+'/Ticket_'+str(task)+'.json'

        if os.access(filename,os.F_OK):
            os.remove(filename)

        return

    def printReport(self, requests):
        """
        Print out a report
        """
        print("%20s %10s %5s %35s %10s %50s %50s" %( 'Savannah Ticket','Status','json','Assigned to','local DBS','Sites','pnns')) 
        print("%20s %10s %5s %35s %10s %50s %50s" %( '-'*20,'-'*10,'-'*5,'-'*35,'-'*10,'-'*50,'-'*50 ))
        
        for report in requests:
            
            json = report["json"]
            ticket = report["task"]
            status = report["ticketStatus"]
            assigned = report["assignedTo"]
            localUrl = report["localUrl"].split('/')[5]
            site = ', '.join(report["sites"])
            pnns = ', '.join(report["pnns"])
            print("%20s %10s %5s %35s %10s %50s %50s" %(ticket,status,json,assigned,localUrl,site,pnns))  
Пример #4
0
class Block(StartPolicyInterface):
    """Split elements into blocks"""
    def __init__(self, **args):
        StartPolicyInterface.__init__(self, **args)
        self.args.setdefault('SliceType', 'NumberOfFiles')
        self.args.setdefault('SliceSize', 1)
        self.lumiType = "NumberOfLumis"

        # Initialize a list of sites where the data is
        self.sites = []

        # Initialize modifiers of the policy
        self.blockBlackListModifier = []

        self.siteDB = SiteDB()

    def split(self):
        """Apply policy to spec"""
        dbs = self.dbs()
        for block in self.validBlocks(self.initialTask, dbs):
            # set the parent flag for processing only for clarity on the couch doc
            parentList = {}
            parentFlag = False
            # TODO this is slow process needs to change in DBS3
            if self.initialTask.parentProcessingFlag():
                parentFlag = True
                for dbsBlock in dbs.listBlockParents(block["block"]):
                    if self.initialTask.getTrustSitelists().get('trustlists'):
                        parentList[dbsBlock["Name"]] = self.sites
                    else:
                        parentList[dbsBlock["Name"]] = self.siteDB.PNNstoPSNs(
                            dbsBlock['PhEDExNodeList'])

            self.newQueueElement(
                Inputs={block['block']: self.data.get(block['block'], [])},
                ParentFlag=parentFlag,
                ParentData=parentList,
                NumberOfLumis=int(block[self.lumiType]),
                NumberOfFiles=int(block['NumberOfFiles']),
                NumberOfEvents=int(block['NumberOfEvents']),
                Jobs=ceil(
                    float(block[self.args['SliceType']]) /
                    float(self.args['SliceSize'])),
                OpenForNewData=True
                if str(block.get('OpenForWriting')) == '1' else False,
                NoInputUpdate=self.initialTask.getTrustSitelists().get(
                    'trustlists'),
                NoPileupUpdate=self.initialTask.getTrustSitelists().get(
                    'trustPUlists'))

    def validate(self):
        """Check args and spec work with block splitting"""
        StartPolicyInterface.validateCommon(self)

        if not self.initialTask.inputDataset():
            raise WorkQueueWMSpecError(self.wmspec, 'No input dataset')

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        validBlocks = []

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        if task.getLumiMask(
        ):  # if we have a lumi mask get only the relevant blocks
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        blocks = []
        # Take data inputs or from spec
        if not self.data:
            if blockWhiteList:
                self.data = dict((block, []) for block in blockWhiteList)
            else:
                self.data = {
                    datasetPath: []
                }  # same structure as in WorkQueueElement

        for data in self.data:
            if data.find('#') > -1:
                Lexicon.block(data)  # check block name
                datasetPath = str(data.split('#')[0])
                blocks.append(str(data))
            else:
                Lexicon.dataset(data)  # check dataset name
                for block in dbs.listFileBlocks(data, onlyClosedBlocks=True):
                    blocks.append(str(block))

        for blockName in blocks:
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue
            if blockName in self.blockBlackListModifier:
                # Don't duplicate blocks rejected before or blocks that were included and therefore are now in the blacklist
                continue
            if task.getLumiMask() and blockName not in maskedBlocks:
                self.rejectedWork.append(blockName)
                continue

            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)
            # blocks with 0 valid files should be ignored
            # - ideally they would be deleted but dbs can't delete blocks
            if not block['NumberOfFiles'] or block['NumberOfFiles'] == '0':
                self.rejectedWork.append(blockName)
                continue

            # check lumi restrictions
            if task.getLumiMask():
                accepted_lumis = sum([
                    len(maskedBlocks[blockName][lfn].getLumis())
                    for lfn in maskedBlocks[blockName]
                ])
                # use the information given from getMaskedBlocks to compute che size of the block
                block['NumberOfFiles'] = len(maskedBlocks[blockName])
                # ratio =  lumis which are ok in the block / total num lumis
                ratioAccepted = 1. * accepted_lumis / float(
                    block['NumberOfLumis'])
                block['NumberOfEvents'] = float(
                    block['NumberOfEvents']) * ratioAccepted
                block[self.lumiType] = accepted_lumis
            # check run restrictions
            elif runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    self.rejectedWork.append(blockName)
                    continue

                if len(runs) == len(runLumis):
                    # If there is no change in the runs, then we can skip recalculating lumi counts
                    recalculateLumiCounts = False

                if recalculateLumiCounts:
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                                acceptedLumiCount += len(
                                    lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(acceptedFileLumiCount) * fileEntry['NumberOfEvents'] \
                                                      / len(fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                    block[self.lumiType] = acceptedLumiCount
                    block['NumberOfFiles'] = acceptedFileCount
                    block['NumberOfEvents'] = acceptedEventCount
            # save locations
            if task.getTrustSitelists().get('trustlists'):
                self.data[block['block']] = self.sites
            else:
                self.data[block['block']] = self.siteDB.PNNstoPSNs(
                    dbs.listFileBlockLocation(block['block']))

            # TODO: need to decide what to do when location is no find.
            # There could be case for network problem (no connection to dbs, phedex)
            # or DBS se is not recorded (This will be retried anyway by location mapper)
            if not self.data[block['block']]:
                self.data[block['block']] = ["NoInitialSite"]
            # # No sites for this block, move it to rejected
            #    self.rejectedWork.append(blockName)
            #    continue

            validBlocks.append(block)
        return validBlocks

    def getMaskedBlocks(self, task, dbs, datasetPath):
        """ Get the blocks which pass the lumi mask restrictions. For each block return the list of lumis
            which were ok (given the lumi mask). The data structure returned is the following:

            {
                "block1" : {"file1" : LumiList(), "file5" : LumiList(), ...}
                "block2" : {"file2" : LumiList(), "file7" : LumiList(), ...}
            }

        """
        # Get the task mask as a LumiList object to make operations easier
        maskedBlocks = {}
        taskMask = task.getLumiMask()

        # for performance reasons, we first get all the blocknames
        blocks = [
            x['block_name'] for x in dbs.dbs.listBlocks(dataset=datasetPath)
        ]

        for block in blocks:
            fileLumis = dbs.dbs.listFileLumis(block_name=block,
                                              validFileOnly=1)
            for fileLumi in fileLumis:
                lfn = fileLumi['logical_file_name']
                runNumber = str(fileLumi['run_num'])
                lumis = fileLumi['lumi_section_num']
                fileMask = LumiList(runsAndLumis={runNumber: lumis})
                commonMask = taskMask & fileMask
                if commonMask:
                    maskedBlocks.setdefault(block, {})
                    maskedBlocks[block].setdefault(lfn, LumiList())
                    maskedBlocks[block][lfn] += commonMask

        return maskedBlocks

    def modifyPolicyForWorkAddition(self, inboxElement):
        """
            A block blacklist modifier will be created,
            this policy object will split excluding the blocks in both the spec
            blacklist and the blacklist modified
        """
        # Get the already processed input blocks from the inbox element
        existingBlocks = inboxElement.get('ProcessedInputs', [])
        self.blockBlackListModifier = existingBlocks
        self.blockBlackListModifier.extend(
            inboxElement.get('RejectedInputs', []))
        return

    def newDataAvailable(self, task, inbound):
        """
            In the case of the block policy, the new data available
            returns True if it finds at least one open block.
        """
        self.initialTask = task
        dbs = self.dbs()
        openBlocks = dbs.listOpenFileBlocks(task.getInputDatasetPath())
        if openBlocks:
            return True
        return False

    @staticmethod
    def supportsWorkAddition():
        """
            Block start policy supports continuous addition of work
        """
        return True
Пример #5
0
class Dataset(StartPolicyInterface):
    """Split elements into datasets"""
    def __init__(self, **args):
        StartPolicyInterface.__init__(self, **args)
        self.args.setdefault('SliceType', 'NumberOfRuns')
        self.args.setdefault('SliceSize', 1)
        self.lumiType = "NumberOfLumis"
        self.sites = []
        if os.getenv("WMAGENT_USE_CRIC", False) or os.getenv(
                "WMCORE_USE_CRIC", False):
            self.cric = CRIC()
        else:
            self.cric = None
            self.siteDB = SiteDB()

    def split(self):
        """Apply policy to spec"""
        work = set() if self.args['SliceType'] == 'NumberOfRuns' else 0
        numFiles = 0
        numEvents = 0
        numLumis = 0
        datasetPath = self.initialTask.getInputDatasetPath()

        # dataset splitting can't have its data selection overridden
        if self.data and self.data.keys() != [datasetPath]:
            raise RuntimeError("Can't provide different data to split with")

        blocks = self.validBlocks(self.initialTask, self.dbs())
        if not blocks:
            return

        for block in blocks:
            if self.args['SliceType'] == 'NumberOfRuns':
                work = work.union(block[self.args['SliceType']])
            else:
                work += float(block[self.args['SliceType']])
            numLumis += int(block[self.lumiType])
            numFiles += int(block['NumberOfFiles'])
            numEvents += int(block['NumberOfEvents'])

        if self.args['SliceType'] == 'NumberOfRuns':
            numJobs = ceil(len(work) / float(self.args['SliceSize']))
        else:
            numJobs = ceil(float(work) / float(self.args['SliceSize']))

        # parentage
        parentFlag = True if self.initialTask.parentProcessingFlag() else False

        self.newQueueElement(
            Inputs={datasetPath: self.data.get(datasetPath, [])},
            ParentFlag=parentFlag,
            NumberOfLumis=numLumis,
            NumberOfFiles=numFiles,
            NumberOfEvents=numEvents,
            Jobs=numJobs,
            NoInputUpdate=self.initialTask.getTrustSitelists().get(
                'trustlists'),
            NoPileupUpdate=self.initialTask.getTrustSitelists().get(
                'trustPUlists'))

    def validate(self):
        """Check args and spec work with block splitting"""
        StartPolicyInterface.validateCommon(self)
        if not self.initialTask.inputDataset():
            raise WorkQueueWMSpecError(self.wmspec, 'No input dataset')

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()
        lumiMask = task.getLumiMask()
        if lumiMask:
            maskedBlocks = self.getMaskedBlocks(task, dbs, datasetPath)

        for blockName in dbs.listFileBlocks(datasetPath):
            # check block restrictions
            if blockWhiteList and blockName not in blockWhiteList:
                continue
            if blockName in blockBlackList:
                continue

            blockSummary = dbs.getDBSSummaryInfo(block=blockName)
            if int(blockSummary.get('NumberOfFiles', 0)) == 0:
                logging.warning(
                    "Block %s being rejected for lack of valid files to process",
                    blockName)
                self.badWork.append(blockName)
                continue

            if self.args['SliceType'] == 'NumberOfRuns':
                blockSummary['NumberOfRuns'] = dbs.listRuns(block=blockName)

            # check lumi restrictions
            if lumiMask:
                if blockName not in maskedBlocks:
                    logging.warning(
                        "Block %s doesn't pass the lumi mask constraints",
                        blockName)
                    self.rejectedWork.append(blockName)
                    continue

                acceptedLumiCount = sum([
                    len(maskedBlocks[blockName][lfn].getLumis())
                    for lfn in maskedBlocks[blockName]
                ])
                ratioAccepted = 1. * acceptedLumiCount / float(
                    blockSummary['NumberOfLumis'])
                maskedRuns = [
                    maskedBlocks[blockName][lfn].getRuns()
                    for lfn in maskedBlocks[blockName]
                ]
                acceptedRuns = set(lumiMask.getRuns()).intersection(
                    set().union(*maskedRuns))

                blockSummary['NumberOfFiles'] = len(maskedBlocks[blockName])
                blockSummary['NumberOfEvents'] = float(
                    blockSummary['NumberOfEvents']) * ratioAccepted
                blockSummary[self.lumiType] = acceptedLumiCount
                blockSummary['NumberOfRuns'] = acceptedRuns
            # check run restrictions
            elif runWhiteList or runBlackList:
                runs = set(dbs.listRuns(block=blockName))
                # multi run blocks need special account, requires more DBS calls
                recalculateLumiCounts = True if len(runs) > 1 else False

                # apply blacklist and whitelist
                runs = runs.difference(runBlackList)
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    logging.warning(
                        "Block %s doesn't pass the runs constraints",
                        blockName)
                    self.rejectedWork.append(blockName)
                    continue

                if recalculateLumiCounts:
                    # Recalculate the number of files, lumis and ~events accepted
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(fileBlockName=blockName)

                    for fileEntry in fileInfo:
                        acceptedFile = False
                        for lumiInfo in fileEntry['LumiList']:
                            if lumiInfo['RunNumber'] in runs:
                                acceptedFile = True
                                acceptedLumiCount += len(
                                    lumiInfo['LumiSectionNumber'])
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedEventCount += fileEntry['NumberOfEvents']

                else:
                    acceptedLumiCount = blockSummary["NumberOfLumis"]
                    acceptedFileCount = blockSummary['NumberOfFiles']
                    acceptedEventCount = blockSummary['NumberOfEvents']

                blockSummary[self.lumiType] = acceptedLumiCount
                blockSummary['NumberOfFiles'] = acceptedFileCount
                blockSummary['NumberOfEvents'] = acceptedEventCount
                blockSummary['NumberOfRuns'] = runs

            validBlocks.append(blockSummary)

            if locations is None:
                locations = set(dbs.listFileBlockLocation(blockName))
            else:
                locations = locations.intersection(
                    dbs.listFileBlockLocation(blockName))

        # all needed blocks present at these sites
        if task.getTrustSitelists().get('trustlists'):
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)
            self.data[datasetPath] = self.sites
        elif locations:
            if self.cric:
                self.data[datasetPath] = list(
                    set(self.cric.PNNstoPSNs(locations)))
            else:
                self.data[datasetPath] = list(
                    set(self.siteDB.PNNstoPSNs(locations)))

        return validBlocks
Пример #6
0
class Dataset(StartPolicyInterface):
    """Split elements into datasets"""
    def __init__(self, **args):
        StartPolicyInterface.__init__(self, **args)
        self.args.setdefault('SliceType', 'NumberOfFiles')
        self.args.setdefault('SliceSize', 1)
        self.lumiType = "NumberOfLumis"
        self.sites = []
        self.siteDB = SiteDB()

    def split(self):
        """Apply policy to spec"""
        dbs = self.dbs()
        work = 0
        numFiles = 0
        numEvents = 0
        numLumis = 0
        inputDataset = self.initialTask.inputDataset()
        datasetPath = "/%s/%s/%s" % (inputDataset.primary,
                                     inputDataset.processed, inputDataset.tier)
        # dataset splitting can't have its data selection overridden
        if self.data and self.data.keys() != [datasetPath]:
            raise RuntimeError("Can't provide different data to split with")

        blocks = self.validBlocks(self.initialTask, self.dbs())
        if not blocks:
            return

        for block in blocks:
            work += float(block[self.args['SliceType']])
            numLumis += int(block[self.lumiType])
            numFiles += int(block['NumberOfFiles'])
            numEvents += int(block['NumberOfEvents'])

        dataset = dbs.getDBSSummaryInfo(dataset=datasetPath)

        # If the dataset which is not in dbs is passed, just return.
        # The exception will be created in upper level
        # when there is no work created
        if not dataset:
            return

        # parentage
        parentFlag = True if self.initialTask.parentProcessingFlag() else False

        if not work:
            work = dataset[self.args['SliceType']]

        self.newQueueElement(
            Inputs={dataset['path']: self.data.get(dataset['path'], [])},
            ParentFlag=parentFlag,
            NumberOfLumis=numLumis,
            NumberOfFiles=numFiles,
            NumberOfEvents=numEvents,
            Jobs=ceil(float(work) / float(self.args['SliceSize'])),
            NoLocationUpdate=self.initialTask.getTrustSitelists())

    def validate(self):
        """Check args and spec work with block splitting"""
        StartPolicyInterface.validateCommon(self)
        if not self.initialTask.inputDataset():
            raise WorkQueueWMSpecError(self.wmspec, 'No input dataset')

    def validBlocks(self, task, dbs):
        """Return blocks that pass the input data restriction"""
        datasetPath = task.getInputDatasetPath()
        Lexicon.dataset(datasetPath)  # check dataset name
        validBlocks = []
        locations = None

        blockWhiteList = task.inputBlockWhitelist()
        blockBlackList = task.inputBlockBlacklist()
        runWhiteList = task.inputRunWhitelist()
        runBlackList = task.inputRunBlacklist()

        if task.getTrustSitelists():
            siteWhitelist = task.siteWhitelist()
            siteBlacklist = task.siteBlacklist()
            self.sites = makeLocationsList(siteWhitelist, siteBlacklist)

        for blockName in dbs.listFileBlocks(datasetPath):
            block = dbs.getDBSSummaryInfo(datasetPath, block=blockName)

            # check block restrictions
            if blockWhiteList and block['block'] not in blockWhiteList:
                continue
            if block['block'] in blockBlackList:
                continue

            # check run restrictions
            if runWhiteList or runBlackList:
                # listRunLumis returns a dictionary with the lumi sections per run
                runLumis = dbs.listRunLumis(block=block['block'])
                runs = set(runLumis.keys())
                recalculateLumiCounts = False
                if len(runs) > 1:
                    # If more than one run in the block
                    # Then we must calculate the lumi counts after filtering the run list
                    # This has to be done rarely and requires calling DBS file information
                    recalculateLumiCounts = True

                # apply blacklist
                runs = runs.difference(runBlackList)
                # if whitelist only accept listed runs
                if runWhiteList:
                    runs = runs.intersection(runWhiteList)
                # any runs left are ones we will run on, if none ignore block
                if not runs:
                    continue

                if recalculateLumiCounts:
                    # get correct lumi count
                    # Recalculate effective size of block
                    # We pull out file info, since we don't do this often
                    acceptedLumiCount = 0
                    acceptedEventCount = 0
                    acceptedFileCount = 0
                    fileInfo = dbs.listFilesInBlock(
                        fileBlockName=block['block'])
                    for fileEntry in fileInfo:
                        acceptedFile = False
                        acceptedFileLumiCount = 0
                        for lumiInfo in fileEntry['LumiList']:
                            runNumber = lumiInfo['RunNumber']
                            if runNumber in runs:
                                acceptedFile = True
                                acceptedFileLumiCount += 1
                        if acceptedFile:
                            acceptedFileCount += 1
                            acceptedLumiCount += acceptedFileLumiCount
                            if len(fileEntry['LumiList']
                                   ) != acceptedFileLumiCount:
                                acceptedEventCount += float(
                                    acceptedFileLumiCount
                                ) * fileEntry['NumberOfEvents'] / len(
                                    fileEntry['LumiList'])
                            else:
                                acceptedEventCount += fileEntry[
                                    'NumberOfEvents']
                else:
                    acceptedLumiCount = block["NumberOfLumis"]
                    acceptedFileCount = block['NumberOfFiles']
                    acceptedEventCount = block['NumberOfEvents']

                # recalculate effective size of block
                # make a guess for new event/file numbers from ratio
                # of accepted lumi sections (otherwise have to pull file info)

                block[self.lumiType] = acceptedLumiCount
                block['NumberOfFiles'] = acceptedFileCount
                block['NumberOfEvents'] = acceptedEventCount

            validBlocks.append(block)
            if locations is None:
                locations = set(dbs.listFileBlockLocation(block['block']))
            else:
                locations = locations.intersection(
                    dbs.listFileBlockLocation(block['block']))

        # all needed blocks present at these sites
        if self.wmspec.getTrustLocationFlag():
            self.data[datasetPath] = self.sites
        elif locations:
            self.data[datasetPath] = list(
                set(self.siteDB.PNNstoPSNs(locations)))

        return validBlocks
Пример #7
0
class RequestQuery:
    def __init__(self, config):
        self.br = Browser()

        self.config = config

        # Initialise connections
        self.mySiteDB = SiteDBJSON()
        self.dbsPhys01 = DbsApi(url=dbs_base_url + "phys01/DBSReader/")
        self.dbsPhys02 = DbsApi(url=dbs_base_url + "phys02/DBSReader/")
        self.dbsPhys03 = DbsApi(url=dbs_base_url + "phys03/DBSReader/")

    def __del__(self):
        self.br.close()

    def getScramArchByCMSSW(self):
        """
        Get from the list of available CMSSW releases
        return a dictionary of ScramArchitecture by CMSSW
        """

        # Set temporary conection to the server and get the response from cmstags
        url = 'https://cmssdt.cern.ch/SDT/cgi-bin/ReleasesXML'
        br = Browser()
        br.set_handle_robots(False)
        response = br.open(url)
        soup = BeautifulSoup(response.read())

        # Dictionary form
        # {'CMSSW_X_X_X':[slc5_amd64_gcc472], ... }
        archByCmssw = {}

        # Fill the dictionary
        for arch in soup.find_all('architecture'):
            for cmssw in arch.find_all('project'):
                # CMSSW release
                cmsswLabel = cmssw.get('label').encode('ascii', 'ignore')
                if cmsswLabel not in archByCmssw:
                    archByCmssw[cmsswLabel] = []
                # ScramArch related to this CMSSW release
                archName = arch.get('name').encode('ascii', 'ignore')
                archByCmssw[cmsswLabel].append(archName)

        return archByCmssw

    def getDatasetOriginSites(self, dbs_url, data):
        """
        Get the origin sites for each block of the dataset.
        Return a list block origin sites.
        """

        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listBlocks(detail=True, dataset=data)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listBlocks(detail=True, dataset=data)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listBlocks(detail=True, dataset=data)

        pnnList = set()
        for block in response:
            pnnList.add(block['origin_site_name'])
        psnList = self.mySiteDB.PNNstoPSNs(pnnList)

        return psnList, list(pnnList)

    def setGlobalTagFromOrigin(self, dbs_url, input_dataset):
        """
        Get the global tag of the dataset from the source dbs url. If it is not set, then set global tag to 'UNKNOWN'
        """

        globalTag = ""
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listOutputConfigs(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listOutputConfigs(dataset=input_dataset)

        globalTag = response[0]['global_tag']
        # GlobalTag cannot be empty
        if globalTag == '':
            globalTag = 'UNKNOWN'

        return globalTag

    def isDataAtUrl(self, dbs_url, input_dataset):
        """
        Returns True if the dataset is at the dbs url, if not returns False
        """
        local_dbs = dbs_url.split('/')[5]
        if local_dbs == 'phys01':
            response = self.dbsPhys01.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys02':
            response = self.dbsPhys02.listDatasets(dataset=input_dataset)
        elif local_dbs == 'phys03':
            response = self.dbsPhys03.listDatasets(dataset=input_dataset)
        # This means that the dataset is not at the url
        if not response:
            return False
        else:
            return True

    def getLabelByValueDict(self, control):
        """
        From control items, create a dictionary by values
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[value] = label

        return d

    def getValueByLabelDict(self, control):
        """
        From control items, create a dictionary by labels
        """
        d = {}
        for item in control.items:
            value = item.attrs['value']
            label = item.attrs['label']
            d[label] = value

        return d

    def createRequestJSON(self,
                          ticket,
                          input_dataset,
                          dbs_url,
                          cmssw_release,
                          group_name,
                          version=1):
        """
        Creates a JSON file 'Ticket_#TICKET.json' with the needed
        information for creating a requeston ReqMgr.
        Input:
            - ticket: the ticket #, for instance 110773 on https://ggus.eu/?mode=ticket_info&ticket_id=110773
            - input_dataset
            - dbs_url: only the instance name, For example: "phys01" for
             https://cmsweb.cern.ch/dbs/prod/phys01/DBSReader
            - cmssw_release
            - group_name: the physics group name
            - version: the dataset version, 1 by default.
        It returns a dictionary that contains the request information.
        """

        scramArchByCMSSW = self.getScramArchByCMSSW()
        self.nodeMappings = self.phedex.getNodeMap()
        task = ticket
        print("Processing ticket: %s" % task)

        #splitting input dataset
        input_primary_dataset = input_dataset.split('/')[1].replace(' ', '')
        input_processed_dataset = input_dataset.split('/')[2].replace(' ', '')
        data_tier = input_dataset.split('/')[3].replace(' ', '')

        # Transform input value to a valid DBS url
        #dbs_url = "https://cmsweb.cern.ch/dbs/prod/"+dbs_url+"/DBSReader"
        dbs_url = dbs_base_url + dbs_url + "/DBSReader"
        release_id = cmssw_release

        # check if deprecated release was used
        release = cmssw_release
        # check if release has not ScramArch match
        if release not in scramArchByCMSSW:
            raise Exception("Error on ticket %s due to ScramArch mismatch" %
                            task)
        else:
            scram_arch = scramArchByCMSSW[release][-1]

        # check if dataset is not at dbs url
        try:
            data_at_url = self.isDataAtUrl(dbs_url, input_dataset)
        except:
            raise Exception(
                'Error on ticket %s, dataset %s not available at %s' %
                (task, input_dataset, dbs_url))

        if not data_at_url:
            raise Exception(
                'Error on ticket %s, dataset %s not available at %s' %
                (task, input_dataset, dbs_url))

        ## Get Physics Group
        group_squad = 'cms-storeresults-' + group_name.replace("-",
                                                               "_").lower()

        ## Get Dataset Version
        dataset_version = str(version)

        # Set default Adquisition Era for StoreResults
        acquisitionEra = "StoreResults"

        ## Construction of the new dataset name (ProcessingString)
        ## remove leading hypernews or physics group name and StoreResults+Version
        if input_processed_dataset.find(group_name) == 0:
            new_dataset = input_processed_dataset.replace(group_name, "", 1)
        else:
            stripped_dataset = input_processed_dataset.split("-")[1:]
            new_dataset = '_'.join(stripped_dataset)

        # Get dataset site info:
        psnList, pnnList = self.getDatasetOriginSites(dbs_url, input_dataset)

        infoDict = {}
        # Build store results json
        # First add all the defaults values
        infoDict["RequestType"] = "StoreResults"
        infoDict["UnmergedLFNBase"] = "/store/unmerged"
        infoDict["MergedLFNBase"] = "/store/results/" + group_name.replace(
            "-", "_").lower()
        infoDict["MinMergeSize"] = 1500000000
        infoDict["MaxMergeSize"] = 5000000000
        infoDict["MaxMergeEvents"] = 100000
        infoDict["TimePerEvent"] = 40
        infoDict["SizePerEvent"] = 512.0
        infoDict["Memory"] = 2394
        infoDict["CmsPath"] = "/uscmst1/prod/sw/cms"
        infoDict["Group"] = "DATAOPS"
        infoDict["DbsUrl"] = dbs_url

        # Add all the information pulled from Savannah
        infoDict["AcquisitionEra"] = acquisitionEra
        infoDict["GlobalTag"] = self.setGlobalTagFromOrigin(
            dbs_url, input_dataset)
        infoDict["DataTier"] = data_tier
        infoDict["InputDataset"] = input_dataset
        infoDict["ProcessingString"] = new_dataset
        infoDict["CMSSWVersion"] = release
        infoDict["ScramArch"] = scram_arch
        infoDict["ProcessingVersion"] = dataset_version
        infoDict["SiteWhitelist"] = psnList

        # Create report for Migration2Global
        report = {}

        #Fill json file, if status is done
        self.writeJSONFile(task, infoDict)
        report["json"] = 'y'
        report["task"] = int(task)
        report["InputDataset"] = input_dataset
        report["ProcessingString"] = new_dataset
        report["localUrl"] = dbs_url
        report["sites"] = psnList
        report["pnns"] = pnnList

        return report

    def writeJSONFile(self, task, infoDict):
        """
        This writes a JSON file at ComponentDir
        """
        ##check if file already exists
        filename = self.config["ComponentDir"] + '/Ticket_' + str(
            task) + '.json'
        if not os.access(filename, os.F_OK):
            jsonfile = open(filename, 'w')
            request = {
                'createRequest': infoDict
            }  ## CHECK THIS BEFORE FINISHING
            jsonfile.write(json.dumps(request, sort_keys=True, indent=4))
            jsonfile.close

        return

    def removeJSONFile(self, task):
        """
        This removes the JSON file at ComponentDir if it was created
        """
        filename = self.config["ComponentDir"] + '/Ticket_' + str(
            task) + '.json'

        if os.access(filename, os.F_OK):
            os.remove(filename)
        return

    def printReport(self, report):
        """
        Print out a report
        """
        print("%20s %5s %10s %50s %50s" %
              ('Ticket', 'json', 'local DBS', 'Sites', 'pnns'))
        print("%20s %5s %10s %50s %50s" %
              ('-' * 20, '-' * 5, '-' * 10, '-' * 50, '-' * 50))

        json = report["json"]
        ticket = report["task"]
        #status = report["ticketStatus"]
        localUrl = report["localUrl"].split('/')[5]
        site = ', '.join(report["sites"])
        pnns = ', '.join(report["pnns"])
        print("%20s %5s %10s %50s %50s" % (ticket, json, localUrl, site, pnns))
Пример #8
0
class SiteDBTest(EmulatedUnitTestCase):
    """
    Unit tests for SiteScreening module
    """

    def  __init__(self, methodName='runTest'):
        super(SiteDBTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(SiteDBTest, self).setUp()
        EmulatorHelper.setEmulators(phedex=False, dbs=False, siteDB=False, requestMgr=True)
        self.mySiteDB = SiteDBJSON()


    def tearDown(self):
        """
        _tearDown_
        """
        super(SiteDBTest, self).tearDown()
        EmulatorHelper.resetEmulators()
        return


    def testCmsNametoPhEDExNode(self):
        """
        #Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']
        results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL')
        self.assertItemsEqual(results, target)

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk']
        results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov")
        self.assertTrue(results == target)
        target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T0_CH_CERN', u'T1_CH_CERN'])
        results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T2_CH_CERN_AI'])
        results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch"))
        self.assertItemsEqual(results, target)

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmsdcadisk01.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.assertTrue(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.assertTrue(result == ['T2_UK_London_IC'])
        return

    def testCMSNametoList(self):
        """
        Test PNN to storage list
        """
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov'])

    def testCheckAndConvertSENameToPNN(self):
        """
        Test the conversion of SE name to PNN for single
        and multiple sites/PNNs using checkAndConvertSENameToPNN
        """

        fnalSE = u'cmsdcadisk01.fnal.gov'
        purdueSE = u'srm.rcac.purdue.edu'
        fnalPNNs = [u'T1_US_FNAL_Buffer', u'T1_US_FNAL_MSS', u'T1_US_FNAL_Disk']
        purduePNN = [u'T2_US_Purdue']

        pnnList = fnalPNNs + purduePNN
        seList = [fnalSE, purdueSE]

        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(fnalSE), fnalPNNs)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([fnalSE]), fnalPNNs)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(purdueSE), purduePNN)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN([purdueSE]), purduePNN)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(seList), purduePNN + fnalPNNs)
        self.assertItemsEqual(self.mySiteDB.checkAndConvertSENameToPNN(pnnList), pnnList)
        return

    def testPNNstoPSNs(self):
        """
        _testPNNstoPSNs_

        Test converting PhEDEx Node Names to Processing Site Names
        """

        result = self.mySiteDB.PNNstoPSNs(['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'])
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue'])
        self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue'])
        return
Пример #9
0
class SiteDBTest(EmulatedUnitTestCase):
    """
    Unit tests for SiteScreening module
    """
    def __init__(self, methodName='runTest'):
        super(SiteDBTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(SiteDBTest, self).setUp()
        self.mySiteDB = SiteDBJSON()

    def testCmsNametoPhEDExNode(self):
        """
        #Tests CmsNametoSE
        """
        target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']
        results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL')
        self.assertItemsEqual(results, target)

    def testSEtoCmsName(self):
        """
        Tests CmsNametoSE
        """
        target = [u'T1_US_FNAL', u'T1_US_FNAL_Disk']
        results = self.mySiteDB.seToCMSName("cmsdcadisk01.fnal.gov")
        self.assertTrue(results == target)
        target = sorted([u'T2_CH_CERN', u'T2_CH_CERN_HLT'])
        results = sorted(self.mySiteDB.seToCMSName("srm-eoscms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T0_CH_CERN', u'T1_CH_CERN'])
        results = sorted(self.mySiteDB.seToCMSName("srm-cms.cern.ch"))
        self.assertItemsEqual(results, target)
        target = sorted([u'T2_CH_CERN_AI'])
        results = sorted(self.mySiteDB.seToCMSName("eoscmsftp.cern.ch"))
        self.assertItemsEqual(results, target)

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmsdcadisk01.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.assertTrue(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.assertTrue(result == ['T2_UK_London_IC'])
        return

    def testCMSNametoList(self):
        """
        Test PNN to storage list
        """
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov'])

    def testPNNstoPSNs(self):
        """
        _testPNNstoPSNs_

        Test converting PhEDEx Node Names to Processing Site Names
        """

        result = self.mySiteDB.PNNstoPSNs(
            ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'])
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue'])
        self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue'])
        return
Пример #10
0
class SiteDBTest(EmulatedUnitTestCase):
    """
    Unit tests for SiteScreening module
    """
    def __init__(self, methodName='runTest'):
        super(SiteDBTest, self).__init__(methodName=methodName)

    def setUp(self):
        """
        Setup for unit tests
        """
        super(SiteDBTest, self).setUp()
        self.mySiteDB = SiteDBJSON()

    def testCmsNametoPhEDExNode(self):
        """
        Tests CMS Name to PhEDEx Node Name
        """
        target = ['T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS']
        results = self.mySiteDB.cmsNametoPhEDExNode('T1_US_FNAL')
        self.assertItemsEqual(results, target)

    def testDNUserName(self):
        """
        Tests DN to Username lookup
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=jha/CN=618566/CN=Manoj Jha"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testDNWithApostrophe(self):
        """
        Tests a DN with an apostrophy in - will fail till SiteDB2 appears
        """
        testDn = "/DC=ch/DC=cern/OU=Organic Units/OU=Users/CN=liviof/CN=472739/CN=Livio Fano'"
        testUserName = "******"
        userName = self.mySiteDB.dnUserName(dn=testDn)
        self.assertTrue(testUserName == userName)

    def testSEFinder(self):
        """
        _testSEFinder_

        See if we can retrieve seNames from all sites
        """

        seNames = self.mySiteDB.getAllSENames()
        self.assertTrue(len(seNames) > 1)
        self.assertTrue('cmsdcadisk01.fnal.gov' in seNames)
        return

    def testPNNtoPSN(self):
        """
        _testPNNtoPSN_

        Test converting PhEDEx Node Name to Processing Site Name
        """

        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Disk')
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNtoPSN('T1_US_FNAL_Tape')
        self.assertTrue(result == [])
        result = self.mySiteDB.PNNtoPSN('T2_UK_London_IC')
        self.assertTrue(result == ['T2_UK_London_IC'])
        return

    def testCMSNametoList(self):
        """
        Test PNN to storage list
        """
        result = self.mySiteDB.cmsNametoList("T1_US*", "SE")
        self.assertItemsEqual(result, [u'cmsdcadisk01.fnal.gov'])

    def testPNNstoPSNs(self):
        """
        _testPNNstoPSNs_

        Test converting PhEDEx Node Names to Processing Site Names
        """
        result = self.mySiteDB.PNNstoPSNs(
            ['T1_US_FNAL_Disk', 'T1_US_FNAL_Buffer', 'T1_US_FNAL_MSS'])
        self.assertTrue(result == ['T1_US_FNAL'])
        result = self.mySiteDB.PNNstoPSNs(['T2_UK_London_IC', 'T2_US_Purdue'])
        self.assertItemsEqual(result, ['T2_UK_London_IC', 'T2_US_Purdue'])
        return

    def testPSNtoPNNMap(self):
        """
        _PSNtoPNNMap_

        Test API to get a map of PSNs and PNNs 
        """
        result = self.mySiteDB.PSNtoPNNMap()
        self.assertTrue(
            [psn for psn in result.keys() if psn.startswith('T1_')])
        self.assertTrue(
            [psn for psn in result.keys() if psn.startswith('T2_')])
        self.assertTrue(
            [psn for psn in result.keys() if psn.startswith('T3_')])
        self.assertTrue(len(result) > 50)

        result = self.mySiteDB.PSNtoPNNMap(psnPattern='T1.*')
        self.assertFalse(
            [psn for psn in result.keys() if not psn.startswith('T1_')])
        self.assertTrue(len(result) < 10)

        result = self.mySiteDB.PSNtoPNNMap(psnPattern='T2.*')
        self.assertFalse(
            [psn for psn in result.keys() if not psn.startswith('T2_')])
        self.assertTrue(len(result) > 10)

        result = self.mySiteDB.PSNtoPNNMap(psnPattern='T3.*')
        self.assertFalse(
            [psn for psn in result.keys() if not psn.startswith('T3_')])
        self.assertTrue(len(result) > 10)

        return

    def testGetAllPhEDExNodeNames(self):
        """
        _testGetAllPhEDExNodeNames_

        Test API to get all PhEDEx Node Names 
        """
        result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=True)
        self.assertFalse([pnn for pnn in result if pnn.endswith('_Buffer')])

        result = self.mySiteDB.getAllPhEDExNodeNames(excludeBuffer=False)
        self.assertTrue(
            len([pnn for pnn in result if pnn.endswith('_Buffer')]) > 5)

        result = self.mySiteDB.getAllPhEDExNodeNames(pattern='T1.*',
                                                     excludeBuffer=True)
        self.assertFalse([pnn for pnn in result if not pnn.startswith('T1_')])
        self.assertTrue(len(result) > 10)

        result = self.mySiteDB.getAllPhEDExNodeNames(pattern='.*',
                                                     excludeBuffer=True)
        self.assertTrue([pnn for pnn in result if pnn.startswith('T1_')])
        self.assertTrue([pnn for pnn in result if pnn.startswith('T2_')])
        self.assertTrue([pnn for pnn in result if pnn.startswith('T3_')])
        self.assertTrue(len(result) > 60)

        return