def getBlockReplicasAndSizeRucio(datasets, rucioUrl, rucioToken, scope="cms"): """ Given a list of datasets, find all their blocks with replicas available. :param datasets: list of dataset names :param rucioUrl: a string with the Rucio URL :param rucioToken: a string with the user rucio token :param scope: a string with the Rucio scope of our data :return: a dictionary in the form of: {"dataset": {"block": {"blockSize": 111, "locations": ["x", "y"]} } } NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ dsetBlockSize = {} if not datasets: return dsetBlockSize headers = {"X-Rucio-Auth-Token": rucioToken} # first, figure out their block names blocksByDset = getContainerBlocksRucio(datasets, rucioUrl, rucioToken, scope=scope) urls = [] for _dset, blocks in blocksByDset.items(): for block in blocks: urls.append('{}/replicas/{}/{}/datasets'.format(rucioUrl, scope, quote(block))) # next, query the replicas API for the block location # this is going to be bloody expensive in terms of HTTP requests logging.info("Executing %d requests against Rucio replicas API for blocks", len(urls)) data = multi_getdata(urls, ckey(), cert(), headers=headers) for row in data: block = row['url'].split("/{}/".format(scope))[1] block = unquote(re.sub("/datasets$", "", block, 1)) container = block.split("#")[0] dsetBlockSize.setdefault(container, dict()) if row['data'] is None: msg = "Failure in getBlockReplicasAndSizeRucio for container {} and block {}.".format(container, block) msg += " Response: {}".format(row) logging.error(msg) dsetBlockSize[container] = None continue if dsetBlockSize[container] is None: # then one of the block requests failed, skip the whole dataset continue thisBlockRSEs = [] blockBytes = 0 for item in parseNewLineJson(row['data']): blockBytes = item['bytes'] if item['state'] == "AVAILABLE": thisBlockRSEs.append(item["rse"]) # now we have the final block location if not blockBytes and not thisBlockRSEs: logging.warning("Block: %s has no replicas and no size", block) else: dsetBlockSize[container][block] = {"locations": thisBlockRSEs, "blockSize": blockBytes} return dsetBlockSize
def dbsInfo(datasets): "Provides DBS info about dataset blocks" urls = [ '%s/blocks?detail=True&dataset=%s' % (dbsUrl(), d) for d in datasets ] data = multi_getdata(urls, ckey(), cert()) datasetBlocks = {} datasetSizes = {} # nblocks = 0 for row in data: dataset = row['url'].split('=')[-1] rows = json.loads(row['data']) blocks = [] size = 0 for item in rows: blocks.append(item['block_name']) size += item['block_size'] datasetBlocks[dataset] = blocks datasetSizes[dataset] = size # nblocks += len(blocks) # tot_size = 0 # for dataset, blocks in datasetBlocks.iteritems(): # tot_size += datasetSizes[dataset] return datasetBlocks, datasetSizes
def getContainerBlocksRucio(containers, rucioUrl, rucioToken, scope="cms"): """ Provided a list of containers, find all their blocks. :param containers: list of container names :param rucioUrl: a string with the Rucio URL :param rucioToken: a string with the user rucio token :param scope: a string with the Rucio scope of our data :return: a dictionary key'ed by the datasets with a list of blocks. NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ blocksByDset = {} if not containers: return blocksByDset headers = {"X-Rucio-Auth-Token": rucioToken} urls = ['{}/dids/{}/{}/dids'.format(rucioUrl, scope, cont) for cont in containers] logging.info("Executing %d requests against Rucio DIDs API for blocks in containers", len(urls)) data = multi_getdata(urls, ckey(), cert(), headers=headers) for row in data: container = row['url'].split("/{}/".format(scope))[1] container = re.sub("/dids$", "", container, 1) if not row['data']: logging.warning("Dataset: %s has no blocks in Rucio", container) blocksByDset.setdefault(container, []) for item in parseNewLineJson(row['data']): blocksByDset[container].append(item["name"]) return blocksByDset
def getBlocksByDsetAndRun(datasetName, runList, dbsUrl): """ Given a dataset name and a list of runs, find all the blocks :return: flat list of blocks """ blocks = set() if isinstance(runList, set): runList = list(runList) urls = [ '%s/blocks?run_num=%s&dataset=%s' % (dbsUrl, str(runList).replace(" ", ""), datasetName) ] data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].rsplit('=')[-1] if row['data'] is None: msg = "Failure in getBlocksByDsetAndRun for %s. Error: %s %s" % ( dataset, row.get('code'), row.get('error')) raise RuntimeError(msg) rows = json.loads(row['data']) for item in rows: blocks.add(item['block_name']) return list(blocks)
def getFileLumisInBlock(blocks, dbsUrl, validFileOnly=1): """ Given a list of blocks, find their file run lumi information in DBS for up to 10 blocks concurrently :param blocks: list of block names :param dbsUrl: string with the DBS URL :param validFileOnly: integer flag for valid files only or not :return: a dict of blocks with list of file/run/lumi info """ runLumisByBlock = {} urls = [ '%s/filelumis?validFileOnly=%d&block_name=%s' % (dbsUrl, validFileOnly, quote(b)) for b in blocks ] # limit it to 10 concurrent calls not to overload DBS logging.info( "Executing %d requests against DBS 'filelumis' API, concurrency limited to 10", len(urls)) data = multi_getdata(urls, ckey(), cert(), num_conn=10) for row in data: blockName = unquote(row['url'].rsplit('=')[-1]) if row['data'] is None: msg = "Failure in getFileLumisInBlock for block %s. Error: %s %s" % ( blockName, row.get('code'), row.get('error')) raise RuntimeError(msg) rows = json.loads(row['data']) runLumisByBlock.setdefault(blockName, []) for item in rows: runLumisByBlock[blockName].append(item) return runLumisByBlock
def getBlockReplicasAndSize(datasets, phedexUrl, group=None): """ Given a list of datasets, find all their blocks with replicas available (thus blocks with at least 1 valid file), completed and subscribed. If PhEDEx group is provided, make sure it's subscribed under that same group. :param datasets: list of dataset names :param phedexUrl: a string with the PhEDEx URL :param group: optional PhEDEx group name :return: a dictionary in the form of: {"dataset": {"block": {"blockSize": 111, "locations": ["x", "y"]} } } NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ dsetBlockSize = {} if not datasets: return dsetBlockSize urls = [ '%s/blockreplicas?dataset=%s' % (phedexUrl, dset) for dset in datasets ] data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].split('=')[-1] if row['data'] is None: print( "Failure in getBlockReplicasAndSize for dataset %s. Error: %s %s" % (dataset, row.get('code'), row.get('error'))) dsetBlockSize.setdefault(dataset, None) continue rows = json.loads(row['data']) dsetBlockSize.setdefault(dataset, {}) try: for item in rows['phedex']['block']: block = { item['name']: { 'blockSize': item['bytes'], 'locations': [] } } for repli in item['replica']: if repli['complete'] == 'y' and repli['subscribed'] == 'y': if not group: block[item['name']]['locations'].append( repli['node']) elif repli['group'] == group: block[item['name']]['locations'].append( repli['node']) dsetBlockSize[dataset].update(block) except Exception as exc: print( "Failure in getBlockReplicasAndSize for dataset %s. Error: %s" % (dataset, str(exc))) dsetBlockSize[dataset] = None return dsetBlockSize
def findParent(datasets, dbsUrl): """ Helper function to find the parent dataset. It returns a dictionary key'ed by the child dataset NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ parentByDset = {} if not datasets: return parentByDset urls = ['%s/datasetparents?dataset=%s' % (dbsUrl, d) for d in datasets] logging.info("Executing %d requests against DBS 'datasetparents' API", len(urls)) data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].split('=')[-1] if row['data'] is None: print("Failure in findParent for dataset %s. Error: %s %s" % (dataset, row.get('code'), row.get('error'))) parentByDset.setdefault(dataset, None) continue rows = json.loads(row['data']) try: for item in rows: parentByDset[item['this_dataset']] = item['parent_dataset'] except Exception as exc: print("Failure in findParent for dataset %s. Error: %s" % (dataset, str(exc))) parentByDset[dataset] = None return parentByDset
def getBlocksByDsetAndRun(datasetName, runList, dbsUrl): """ Given a dataset name and a list of runs, find all the blocks :return: flat list of blocks """ blocks = set() if isinstance(runList, set): runList = list(runList) urls = [] for runSlice in grouper(runList, 50): urls.append('%s/blocks?run_num=%s&dataset=%s' % (dbsUrl, str(runSlice).replace(" ", ""), datasetName)) logging.info( "Executing %d requests against DBS 'blocks' API, with run_num list", len(urls)) data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].rsplit('=')[-1] if row['data'] is None: msg = "Failure in getBlocksByDsetAndRun for %s. Error: %s %s" % ( dataset, row.get('code'), row.get('error')) raise RuntimeError(msg) rows = json.loads(row['data']) for item in rows: blocks.add(item['block_name']) return list(blocks)
def dbsInfo(datasets, dbsUrl): "Provides DBS info about dataset blocks" datasetBlocks = {} datasetSizes = {} datasetTransfers = {} if not datasets: return datasetBlocks, datasetSizes, datasetTransfers urls = ['%s/blocks?detail=True&dataset=%s' % (dbsUrl, d) for d in datasets] logging.info( "Executing %d requests against DBS 'blocks' API, with details", len(urls)) data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].split('=')[-1] if row['data'] is None: print("FAILURE: dbsInfo for %s. Error: %s %s" % (dataset, row.get('code'), row.get('error'))) continue rows = json.loads(row['data']) blocks = [] size = 0 datasetTransfers.setdefault( dataset, {}) # flat dict in the format of blockName: blockSize for item in rows: blocks.append(item['block_name']) size += item['block_size'] datasetTransfers[dataset].update( {item['block_name']: item['block_size']}) datasetBlocks[dataset] = blocks datasetSizes[dataset] = size return datasetBlocks, datasetSizes, datasetTransfers
def _getRequestSpecs(self, requestNames): "Helper function to get all specs for given set of request names" urls = [str('%s/%s/spec' % (self.msConfig['reqmgrCacheUrl'], r)) for r in requestNames] data = multi_getdata(urls, ckey(), cert()) rdict = {} for row in data: req = row['url'].split('/')[-2] rdict[req] = pickle.loads(row['data']) return rdict
def getRequestSpecs(requestNames): "Helper function to get all specs for given set of request names" urls = [str('%s/%s/spec' % (reqmgrCacheUrl(), r)) for r in requestNames] data = multi_getdata(urls, ckey(), cert()) rdict = {} for row in data: req = row['url'].split('/')[-2] rdict[req] = pickle.loads(row['data']) return rdict
def getPileupSubscriptionsRucio(datasets, rucioUrl, rucioToken, scope="cms"): """ Provided a list of datasets, find dataset level subscriptions where it's as complete as `percent_min`. :param datasets: list of dataset names :param rucioUrl: a string with the Rucio URL :param rucioToken: a string with the user rucio token :param scope: a string with the Rucio scope of our data :return: a dictionary of datasets and a list of their location. NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ # FIXME: we should definitely make a feature request to Rucio... # so much, just to get the final RSEs for a container!!! locationByDset = {} if not datasets: return locationByDset headers = {"X-Rucio-Auth-Token": rucioToken} # first, resolve the dataset into blocks blocksByDset = getContainerBlocksRucio(datasets, rucioUrl, rucioToken, scope) urls = [] for _dset, blocks in blocksByDset.items(): if blocks: for block in blocks: urls.append('{}/replicas/{}/{}/datasets'.format(rucioUrl, scope, quote(block))) # this is going to be bloody expensive in terms of HTTP requests logging.info("Executing %d requests against Rucio replicas API for blocks", len(urls)) data = multi_getdata(urls, ckey(), cert(), headers=headers) for row in data: block = row['url'].split("/{}/".format(scope))[1] block = unquote(re.sub("/datasets$", "", block, 1)) container = block.split("#")[0] locationByDset.setdefault(container, set()) if row['data'] is None: msg = "Failure in getPileupSubscriptionsRucio container {} and block {}.".format(container, block) msg += " Response: {}".format(row) logging.error(msg) locationByDset[container] = None continue if locationByDset[container] is None: # then one of the block requests failed, skip the whole dataset continue thisBlockRSEs = set() for item in parseNewLineJson(row['data']): if item['state'] == "AVAILABLE": thisBlockRSEs.add(item["rse"]) logging.info("Block: %s is available at: %s", block, thisBlockRSEs) # now we have the final block location if not locationByDset[container]: # then this is the first block of this dataset locationByDset[container] = thisBlockRSEs else: # otherwise, make an intersection of them locationByDset[container] = locationByDset[container] & thisBlockRSEs return locationByDset
def phedexInfo(datasets): "Fetch PhEDEx info about nodes for all datasets" urls = ['%s/blockreplicasummary?dataset=%s' % (phedexUrl(), d) for d in datasets] data = multi_getdata(urls, ckey(), cert()) blockNodes = {} for row in data: rows = json.loads(row['data']) for item in rows['phedex']['block']: nodes = [r['node'] for r in item['replica'] if r['complete'] == 'y'] blockNodes[item['name']] = nodes return blockNodes
def getBlocksAndSizeRucio(containers, rucioUrl, rucioToken, scope="cms"): """ Given a list of containers, find all their correspondent blocks and their sizes. :param containers: list of container names :param rucioUrl: a string with the Rucio URL :param rucioToken: a string with the user rucio token :param scope: a string with the Rucio scope of our data :return: a dictionary in the form of: {"dataset": {"block": {"blockSize": 111, "locations": ["x", "y"]} } } NOTE: Value `None` is returned in case the data-service failed to serve a given request. NOTE2: meant to return an output similar to Common.getBlockReplicasAndSize """ contBlockSize = {} if not containers: return contBlockSize headers = {"X-Rucio-Auth-Token": rucioToken} urls = [] for cont in containers: ### FIXME: the long attribute value type has recently changed integer to boolean ### see PR: https://github.com/rucio/rucio/pull/3949 , which went in in 1.23.5 series ### we need to make sure CMS production Rucio will be running that version once MicroServices ### get deployed to CMSWEB urls.append( '{}/dids/{}/dids/search?type=dataset&long=True&name={}'.format( rucioUrl, scope, quote(cont + "#*"))) logging.info( "Executing %d requests against Rucio DIDs search API for containers", len(urls)) data = multi_getdata(urls, ckey(), cert(), headers=headers) for row in data: container = row['url'].split("name=")[1] container = unquote(container).replace("#*", "") contBlockSize.setdefault(container, {}) if row['data'] in [None, ""]: msg = "Failure in getBlocksAndSizeRucio function for container {}. Response: {}".format( container, row) logging.error(msg) contBlockSize[container] = None continue for item in parseNewLineJson(row['data']): # NOTE: we do not care about primary block location in Rucio contBlockSize[container][item['name']] = { "blockSize": item['bytes'], "locations": [] } return contBlockSize
def getRequestWorkflows(requestNames): "Helper function to get all specs for given set of request names" urls = [str('%s/data/request/%s' % (reqmgrUrl(), r)) for r in requestNames] data = multi_getdata(urls, ckey(), cert()) rdict = {} for row in data: req = row['url'].split('/')[-1] try: data = json.loads(row['data']) rdict[req] = data['result'][0] # we get back {'result': [workflow]} dict except Exception as exp: print("ERROR: fail to load data as json record, error=%s" % str(exp)) print(row) return rdict
def phedexInfo(datasets, phedexUrl): "Fetch PhEDEx info about nodes for all datasets" urls = [ '%s/blockreplicasummary?dataset=%s' % (phedexUrl, d) for d in datasets ] data = multi_getdata(urls, ckey(), cert()) blockNodes = {} for row in data: rows = json.loads(row['data']) for item in rows['phedex']['block']: nodes = [ r['node'] for r in item['replica'] if r['complete'] == 'y' ] blockNodes[item['name']] = nodes return blockNodes
def getRequestWorkflows(requestNames): "Helper function to get all specs for given set of request names" urls = [str('%s/data/request/%s' % (reqmgrUrl(), r)) for r in requestNames] data = multi_getdata(urls, ckey(), cert()) rdict = {} for row in data: req = row['url'].split('/')[-1] try: data = json.loads(row['data']) rdict[req] = data['result'][ 0] # we get back {'result': [workflow]} dict except Exception as exp: print("ERROR: fail to load data as json record, error=%s" % str(exp)) print(row) return rdict
def getPileupSubscriptions(datasets, phedexUrl, group=None, percentMin=99): """ Provided a list of datasets, find dataset level subscriptions where it's as complete as `percent_min`. :param datasets: list of dataset names :param phedexUrl: a string with the PhEDEx URL :param group: optional string with the PhEDEx group :param percent_min: only return subscriptions that are this complete :return: a dictionary of datasets and a list of their location. NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ locationByDset = {} if not datasets: return locationByDset if group: url = "%s/subscriptions?group=%s" % (phedexUrl, group) url += "&percent_min=%s&dataset=%s" else: url = "%s/subscriptions?" % phedexUrl url += "percent_min=%s&dataset=%s" urls = [url % (percentMin, dset) for dset in datasets] logging.info("Executing %d requests against PhEDEx 'subscriptions' API", len(urls)) data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].rsplit('=')[-1] if row['data'] is None: print( "Failure in getPileupSubscriptions for dataset %s. Error: %s %s" % (dataset, row.get('code'), row.get('error'))) locationByDset.setdefault(dataset, None) continue rows = json.loads(row['data']) locationByDset.setdefault(dataset, []) try: for item in rows['phedex']['dataset']: for subs in item['subscription']: locationByDset[dataset].append(subs['node']) except Exception as exc: print( "Failure in getPileupSubscriptions for dataset %s. Error: %s" % (dataset, str(exc))) locationByDset[dataset] = None return locationByDset
def getPileupContainerSizesRucio(containers, rucioUrl, rucioToken, scope="cms"): """ Given a list of containers, find their total size in Rucio :param containers: list of container names :param rucioUrl: a string with the Rucio URL :param rucioToken: a string with the user rucio token :param scope: a string with the Rucio scope of our data :return: a flat dictionary of container and their respective sizes NOTE: Value `None` is returned in case the data-service failed to serve a given request. NOTE: Rucio version of getPileupDatasetSizes() """ sizeByDset = {} if not containers: return sizeByDset headers = {"X-Rucio-Auth-Token": rucioToken} urls = [ '{}/dids/{}/{}?dynamic=anything'.format(rucioUrl, scope, cont) for cont in containers ] logging.info("Executing %d requests against Rucio for the container size", len(urls)) data = multi_getdata(urls, ckey(), cert(), headers=headers) for row in data: container = row['url'].split('/dids/{}/'.format(scope))[1] container = container.replace("?dynamic=anything", "") if row['data'] is None: msg = "Failure in getPileupContainerSizesRucio for container {}. Response: {}".format( container, row) logging.error(msg) sizeByDset.setdefault(container, None) continue response = json.loads(row['data']) try: sizeByDset.setdefault(container, response['bytes']) except KeyError: msg = "getPileupContainerSizesRucio function did not return a valid response for container: %s. Error: %s" logging.error(msg, container, response) sizeByDset.setdefault(container, None) continue return sizeByDset
def _getRequestWorkflows(self, requestNames): "Helper function to get all specs for given set of request names" urls = [str('%s/data/request/%s' % (self.msConfig['reqmgr2Url'], r)) for r in requestNames] self.logger.debug("getRequestWorkflows") for u in urls: self.logger.debug("url %s", u) data = multi_getdata(urls, ckey(), cert()) rdict = {} for row in data: req = row['url'].split('/')[-1] try: data = json.loads(row['data']) rdict[req] = data['result'][0] # we get back {'result': [workflow]} dict except Exception as exp: self.logger.error("fail to process row %s", row) self.logger.exception("fail to load data as json record, error=%s", str(exp)) return rdict
def getRunsInBlock(blocks, dbsUrl): """ Provided a list of block names, find their run numbers :param blocks: list of block names :param dbsUrl: string with the DBS URL :return: a dictionary of block names and a list of run numbers """ runsByBlock = {} urls = ['%s/runs?block_name=%s' % (dbsUrl, quote(b)) for b in blocks] data = multi_getdata(urls, ckey(), cert()) for row in data: blockName = unquote(row['url'].rsplit('=')[-1]) if row['data'] is None: msg = "Failure in getRunsInBlock for block %s. Error: %s %s" % ( blockName, row.get('code'), row.get('error')) raise RuntimeError(msg) rows = json.loads(row['data']) runsByBlock[blockName] = rows[0]['run_num'] return runsByBlock
def eventsLumisInfo(inputs, dbsUrl, validFileOnly=0, sumOverLumi=0): "Get information about events and lumis for given set of inputs: blocks or datasets" what = 'dataset' eventsLumis = {} if not inputs: return eventsLumis if '#' in inputs[0]: # inputs are list of blocks what = 'block_name' urls = ['%s/filesummaries?validFileOnly=%s&sumOverLumi=%s&%s=%s' \ % (dbsUrl, validFileOnly, sumOverLumi, what, urllib.quote(i)) \ for i in inputs] data = multi_getdata(urls, ckey(), cert()) for row in data: key = row['url'].split('=')[-1] if what == 'block_name': key = urllib.unquote(key) rows = json.loads(row['data']) for item in rows: eventsLumis[key] = item return eventsLumis
def phedexInfo(datasets, phedexUrl): "Fetch PhEDEx info about nodes for all datasets" urls = [ '%s/blockreplicasummary?dataset=%s' % (phedexUrl, d) for d in datasets ] data = multi_getdata(urls, ckey(), cert()) blockNodes = {} for row in data: dataset = row['url'].rsplit('=')[-1] if row['data'] is None: print("FAILURE: phedexInfo for %s. Error: %s %s" % (dataset, row.get('code'), row.get('error'))) continue rows = json.loads(row['data']) for item in rows['phedex']['block']: nodes = [ r['node'] for r in item['replica'] if r['complete'] == 'y' ] blockNodes[item['name']] = nodes return blockNodes
def eventsLumisInfo(inputs, validFileOnly=0, sumOverLumi=0): "Get information about events and lumis for given set of inputs: blocks or datasets" what = 'dataset' eventsLumis = {} if not inputs: return eventsLumis if '#' in inputs[0]: # inputs are list of blocks what = 'block_name' urls = ['%s/filesummaries?validFileOnly=%s&sumOverLumi=%s&%s=%s' \ % (dbsUrl(), validFileOnly, sumOverLumi, what, urllib.quote(i)) \ for i in inputs] data = multi_getdata(urls, ckey(), cert()) for row in data: key = row['url'].split('=')[-1] if what == 'block_name': key = urllib.unquote(key) rows = json.loads(row['data']) for item in rows: eventsLumis[key] = item return eventsLumis
def getPileupDatasetSizes(datasets, phedexUrl): """ Given a list of datasets, find all their blocks with replicas available, i.e., blocks that have valid files to be processed, and calculate the total dataset size :param datasets: list of dataset names :param phedexUrl: a string with the PhEDEx URL :return: a dictionary of datasets and their respective sizes NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ sizeByDset = {} if not datasets: return sizeByDset urls = [ '%s/blockreplicas?dataset=%s' % (phedexUrl, dset) for dset in datasets ] logging.info("Executing %d requests against PhEDEx 'blockreplicas' API", len(urls)) data = multi_getdata(urls, ckey(), cert()) for row in data: dataset = row['url'].split('=')[-1] if row['data'] is None: print( "Failure in getPileupDatasetSizes for dataset %s. Error: %s %s" % (dataset, row.get('code'), row.get('error'))) sizeByDset.setdefault(dataset, None) continue rows = json.loads(row['data']) sizeByDset.setdefault(dataset, 0) try: for item in rows['phedex']['block']: sizeByDset[dataset] += item['bytes'] except Exception as exc: print( "Failure in getPileupDatasetSizes for dataset %s. Error: %s" % (dataset, str(exc))) sizeByDset[dataset] = None return sizeByDset
def dbsInfo(datasets): "Provides DBS info about dataset blocks" urls = ['%s/blocks?detail=True&dataset=%s' % (dbsUrl(), d) for d in datasets] data = multi_getdata(urls, ckey(), cert()) datasetBlocks = {} datasetSizes = {} # nblocks = 0 for row in data: dataset = row['url'].split('=')[-1] rows = json.loads(row['data']) blocks = [] size = 0 for item in rows: blocks.append(item['block_name']) size += item['block_size'] datasetBlocks[dataset] = blocks datasetSizes[dataset] = size # nblocks += len(blocks) # tot_size = 0 # for dataset, blocks in datasetBlocks.iteritems(): # tot_size += datasetSizes[dataset] return datasetBlocks, datasetSizes
def findBlockParents(blocks, dbsUrl): """ Helper function to find block parents given a list of block names. Return a dictionary in the format of: {"child dataset name": {"child block": ["parent blocks"], "child block": ["parent blocks"], ...}} NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ parentsByBlock = {} urls = [ '%s/blockparents?block_name=%s' % (dbsUrl, quote(b)) for b in blocks ] logging.info("Executing %d requests against DBS 'blockparents' API", len(urls)) data = multi_getdata(urls, ckey(), cert()) for row in data: blockName = unquote(row['url'].rsplit('=')[-1]) dataset = blockName.split("#")[0] if row['data'] is None: print("Failure in findBlockParents for block %s. Error: %s %s" % (blockName, row.get('code'), row.get('error'))) parentsByBlock.setdefault(dataset, None) continue rows = json.loads(row['data']) try: if dataset in parentsByBlock and parentsByBlock[dataset] is None: # then one of the block calls has failed, keep it failed! continue parentsByBlock.setdefault(dataset, {}) for item in rows: parentsByBlock[dataset].setdefault(item['this_block_name'], set()) parentsByBlock[dataset][item['this_block_name']].add( item['parent_block_name']) except Exception as exc: print("Failure in findBlockParents for block %s. Error: %s" % (blockName, str(exc))) parentsByBlock[dataset] = None return parentsByBlock
def eventsLumisInfo(inputs, dbsUrl, validFileOnly=0, sumOverLumi=0): "Get information about events and lumis for given set of inputs: blocks or datasets" what = 'dataset' eventsLumis = {} if not inputs: return eventsLumis if '#' in inputs[0]: # inputs are list of blocks what = 'block_name' urls = [ '%s/filesummaries?validFileOnly=%s&sumOverLumi=%s&%s=%s' % (dbsUrl, validFileOnly, sumOverLumi, what, quote(i)) for i in inputs ] data = multi_getdata(urls, ckey(), cert()) for row in data: data = unquote(row['url'].split('=')[-1]) if row['data'] is None: print("FAILURE: eventsLumisInfo for %s. Error: %s %s" % (data, row.get('code'), row.get('error'))) continue rows = json.loads(row['data']) for item in rows: eventsLumis[data] = item return eventsLumis
def fetch(self): "Fetch information about sites from various CMS data-services" tfile = tempfile.NamedTemporaryFile() dashboardUrl = "http://dashb-ssb.cern.ch/dashboard/request.py" urls = [ '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl, '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl, ### FIXME: these calls to gwmsmon are failing pretty badly with ### "302 Found" and failing to decode, causing a huge error dump ### to the logs # 'https://cms-gwmsmon.cern.ch/totalview/json/site_summary', # 'https://cms-gwmsmon.cern.ch/prodview/json/site_summary', # 'https://cms-gwmsmon.cern.ch/poolview/json/totals', # 'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus', 'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json', 'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt', 'http://cmsmonitoring.web.cern.ch/cmsmonitoring/storageoverview/latest/StorageOverview.json', ] cookie = {} ssbids = [ '106', '107', '108', '109', '136', '158', '159', '160', '237' ] sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6'] for url in urls: if 'gwmsmon' in url: cern_sso_cookie(url, tfile.name, cert(), ckey()) cookie.update({url: tfile.name}) gen = multi_getdata(urls, ckey(), cert(), cookie=cookie) siteInfo = {} for row in gen: if 'Detox' in row['url']: data = row['data'] else: try: data = json.loads(row['data']) except Exception as exc: self.logger.exception('error %s for row %s', str(exc), row) data = {} if 'ssb' in row['url']: for ssbid in ssbids: if ssbid in row['url']: siteInfo['ssb_%s' % ssbid] = data elif 'prodview/json/site_summary' in row['url']: siteInfo['gwmsmon_prod_site_summary'] = data elif 'totalview/json/site_summary' in row['url']: siteInfo['gwmsmon_site_summary'] = data elif 'totals' in row['url']: siteInfo['gwmsmon_totals'] = data elif 'maxusedcpus' in row['url']: siteInfo['gwmsmon_prod_maxused'] = data elif 'mcore' in row['url']: siteInfo['mcore'] = data elif 'Detox' in row['url']: siteInfo['detox_sites'] = data elif 'monitoring' in row['url']: siteInfo['mss_usage'] = data elif 'stuck' in row['url']: for sid in sids: if sid in row['url']: siteInfo['stuck_%s' % sid] = data siteInfo['site_queues'] = getNodeQueues() return siteInfo
def fetch(self): "Fetch information about sites from various CMS data-services" tfile = tempfile.NamedTemporaryFile() urls = [ '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl(), 'https://cms-gwmsmon.cern.ch/totalview/json/site_summary', 'https://cms-gwmsmon.cern.ch/prodview/json/site_summary', 'https://cms-gwmsmon.cern.ch/poolview/json/totals', 'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus', 'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json', 'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt', '%s/storageoverview/latest/StorageOverview.json' % monitoringUrl(), '%s/stuck_1.json' % stucktransferUrl(), '%s/stuck_2.json' % stucktransferUrl(), '%s/stuck_m1.json' % stucktransferUrl(), '%s/stuck_m3.json' % stucktransferUrl(), '%s/stuck_m4.json' % stucktransferUrl(), '%s/stuck_m5.json' % stucktransferUrl(), '%s/stuck_m6.json' % stucktransferUrl(), ] cookie = {} ssbids = ['106', '107', '108', '109', '136', '158', '159', '160', '237'] sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6'] for url in urls: if 'gwmsmon' in url: cern_sso_cookie(url, tfile.name, cert(), ckey()) cookie.update({url: tfile.name}) gen = multi_getdata(urls, ckey(), cert(), cookie=cookie) siteInfo = {} for row in gen: if 'Detox' in row['url']: data = row['data'] else: try: data = json.loads(row['data']) except Exception: traceback.print_exc() print(row) data = {} if 'ssb' in row['url']: for ssbid in ssbids: if ssbid in row['url']: siteInfo['ssb_%s' % ssbid] = data elif 'prodview/json/site_summary' in row['url']: siteInfo['gwmsmon_prod_site_summary'] = data elif 'totalview/json/site_summary' in row['url']: siteInfo['gwmsmon_site_summary'] = data elif 'totals' in row['url']: siteInfo['gwmsmon_totals'] = data elif 'maxusedcpus' in row['url']: siteInfo['gwmsmon_prod_maxused'] = data elif 'mcore' in row['url']: siteInfo['mcore'] = data elif 'Detox' in row['url']: siteInfo['detox_sites'] = data elif 'monitoring' in row['url']: siteInfo['mss_usage'] = data elif 'stuck' in row['url']: for sid in sids: if sid in row['url']: siteInfo['stuck_%s' % sid] = data siteInfo['site_queues'] = getNodeQueues() siteInfo['ready_in_agent'] = agentsSites(agentInfoUrl()) return siteInfo
def fetch(self): "Fetch information about sites from various CMS data-services" tfile = tempfile.NamedTemporaryFile() urls = [ '%s/getplotdata?columnid=106&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=107&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=108&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=109&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=136&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=158&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=159&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=160&batch=1&lastdata=1' % dashboardUrl(), '%s/getplotdata?columnid=237&batch=1&lastdata=1' % dashboardUrl(), ### FIXME: these calls to gwmsmon are failing pretty badly with ### "302 Found" and failing to decode, causing a huge error dump ### to the logs # 'https://cms-gwmsmon.cern.ch/totalview/json/site_summary', # 'https://cms-gwmsmon.cern.ch/prodview/json/site_summary', # 'https://cms-gwmsmon.cern.ch/poolview/json/totals', # 'https://cms-gwmsmon.cern.ch/prodview/json/maxusedcpus', 'http://cmsgwms-frontend-global.cern.ch/vofrontend/stage/mcore_siteinfo.json', 'http://t3serv001.mit.edu/~cmsprod/IntelROCCS/Detox/SitesInfo.txt', '%s/storageoverview/latest/StorageOverview.json' % monitoringUrl(), ] cookie = {} ssbids = ['106', '107', '108', '109', '136', '158', '159', '160', '237'] sids = ['1', '2', 'm1', 'm3', 'm4', 'm5', 'm6'] for url in urls: if 'gwmsmon' in url: cern_sso_cookie(url, tfile.name, cert(), ckey()) cookie.update({url: tfile.name}) gen = multi_getdata(urls, ckey(), cert(), cookie=cookie) siteInfo = {} for row in gen: if 'Detox' in row['url']: data = row['data'] else: try: data = json.loads(row['data']) except Exception: traceback.print_exc() print(row) data = {} if 'ssb' in row['url']: for ssbid in ssbids: if ssbid in row['url']: siteInfo['ssb_%s' % ssbid] = data elif 'prodview/json/site_summary' in row['url']: siteInfo['gwmsmon_prod_site_summary'] = data elif 'totalview/json/site_summary' in row['url']: siteInfo['gwmsmon_site_summary'] = data elif 'totals' in row['url']: siteInfo['gwmsmon_totals'] = data elif 'maxusedcpus' in row['url']: siteInfo['gwmsmon_prod_maxused'] = data elif 'mcore' in row['url']: siteInfo['mcore'] = data elif 'Detox' in row['url']: siteInfo['detox_sites'] = data elif 'monitoring' in row['url']: siteInfo['mss_usage'] = data elif 'stuck' in row['url']: for sid in sids: if sid in row['url']: siteInfo['stuck_%s' % sid] = data siteInfo['site_queues'] = getNodeQueues() siteInfo['ready_in_agent'] = agentsSites(agentInfoUrl()) return siteInfo
def listReplicationRules(containers, rucioAccount, grouping, rucioUrl, rucioToken, scope="cms"): """ List all the replication rules for the input filters provided. It builds a dictionary of container name and the locations where they have a rule locking data on, with some additional rule state logic in the code. :param containers: list of container names :param rucioAccount: string with the rucio account :param grouping: rule grouping string, only "A" or "D" are allowed :param rucioUrl: string with the Rucio url :param rucioToken: string with the Rucio token :param scope: string with the data scope :return: a flat dictionary key'ed by the container name, with a list of RSE expressions that still need to be resolved NOTE: Value `None` is returned in case the data-service failed to serve a given request. """ locationByContainer = {} if not containers: return locationByContainer if grouping not in ["A", "D"]: raise RuntimeError("Replication rule grouping value provided ({}) is not allowed!".format(grouping)) headers = {"X-Rucio-Auth-Token": rucioToken} urls = [] for cont in containers: urls.append('{}/rules/?scope={}&account={}&grouping={}&name={}'.format(rucioUrl, scope, rucioAccount, grouping, quote(cont, safe=""))) logging.info("Executing %d requests against Rucio to list replication rules", len(urls)) data = multi_getdata(urls, ckey(), cert(), headers=headers) for row in data: container = unquote(row['url'].split("name=")[1]) if "200 OK" not in row['headers']: msg = "Failure in listReplicationRules for container {}. Response: {}".format(container, row) logging.error(msg) locationByContainer.setdefault(container, None) continue try: locationByContainer.setdefault(container, []) for item in parseNewLineJson(row['data']): if item['state'] in ["U", "SUSPENDED"]: logging.warning("Container %s has a SUSPENDED rule. Skipping rule: %s", container, item) continue elif item['state'] in ["S", "STUCK"]: if item['error'] == 'NO_SOURCES:NO_SOURCES': msg = "Container {} has a STUCK rule with NO_SOURCES.".format(container) msg += " Data could be lost forever... Rule info is: {}".format(item) logging.warning(msg) continue timeDiff = item['stuck_at'] - item['created_at'] if int(timeDiff.days) > STUCK_LIMIT: msg = "Container {} has a STUCK rule for {} days (limit set to: {}).".format(container, timeDiff.days, STUCK_LIMIT) msg += " Not going to use it! Rule info: {}".format(item) logging.warning(msg) continue else: msg = "Container {} has a STUCK rule for only {} days.".format(container, timeDiff.days) msg += " Considering it for the pileup location" logging.info(msg) else: logging.info("Container %s has rule ID %s in state %s, using it.", container, item['id'], item['state']) ### NOTE: this is not an RSE name, but an RSE expression that still needs to be resolved locationByContainer[container].append(item['rse_expression']) except Exception as exc: msg = "listReplicationRules function did not return a valid response for container: %s." msg += "Server responded with: %s\nError: %s" logging.exception(msg, container, str(exc), row['data']) locationByContainer.setdefault(container, None) continue return locationByContainer