def getNativeRucioClient(config=None, logger=None): """ instantiates a Rucio python Client for use in CRAB TaskWorker :param config: a TaskWorker configuration object in which at least the variables used below are defined :param logger: a valid logger instance :return: a Rucio Client object """ logger.info("Initializing native Rucio client") from rucio.client import Client nativeClient = Client(rucio_host=config.Services.Rucio_host, auth_host=config.Services.Rucio_authUrl, ca_cert=config.Services.Rucio_caPath, account=config.Services.Rucio_account, creds={ "client_cert": config.TaskWorker.cmscert, "client_key": config.TaskWorker.cmskey }, auth_type='x509') ret = nativeClient.ping() logger.info("Rucio server v.%s contacted", ret['version']) ret = nativeClient.whoami() logger.info("Rucio client initialized for %s in status %s", ret['account'], ret['status']) return nativeClient
def __init__(self, acct, hostUrl=None, authUrl=None, configDict=None): """ Constructs a Rucio object with the Client object embedded. In order to instantiate a Rucio Client object, it assumes the host has a proper rucio configuration file, where the default host and authentication host URL come from, as well as the X509 certificate information. :param acct: rucio account to be used :param hostUrl: defaults to the rucio config one :param authUrl: defaults to the rucio config one :param configDict: dictionary with extra parameters """ configDict = configDict or {} params = configDict.copy() params.setdefault('ca_cert', None) params.setdefault('auth_type', None) params.setdefault('creds', None) params.setdefault('timeout', 600) params.setdefault('user_agent', 'wmcore-client') self.logger = params.get("logger", logging.getLogger()) # yield output compatible with the PhEDEx service class self.phedexCompat = params.get("phedexCompatible", True) msg = "WMCore Rucio initialization with acct: %s, host: %s, auth: %s" % (acct, hostUrl, authUrl) msg += " and these extra parameters: %s" % params self.logger.info(msg) self.cli = Client(rucio_host=hostUrl, auth_host=authUrl, account=acct, ca_cert=params['ca_cert'], auth_type=params['auth_type'], creds=params['creds'], timeout=params['timeout'], user_agent=params['user_agent']) clientParams = {} for k in ("host", "auth_host", "auth_type", "account", "user_agent", "ca_cert", "creds", "timeout", "request_retries"): clientParams[k] = getattr(self.cli, k) self.logger.info("Rucio client initialization with: %s", clientParams)
class aCTRucioMonitor(aCTLDMXProcess): def __init__(self): aCTLDMXProcess.__init__(self) self.rucio = Client() self.rucio_prometheus_port = int( self.arcconf.get(['monitor', 'rucioprometheusport']) or 0) if self.rucio_prometheus_port: start_http_server(self.rucio_prometheus_port) self.collector = aCTRucioCollector() REGISTRY.register(self.collector) else: self.log.info('Prometheus monitoring not enabled') def process(self): '''Actual metric gathering from Rucio is done at a low frequency here''' if not self.rucio_prometheus_port: return rses = self.rucio.list_rses() metrics = {} for rse in rses: info = self.rucio.get_rse_usage(rse['rse'], filters={'source': 'rucio'}) metrics[rse['rse']] = next(info) self.collector.metrics = metrics time.sleep(120)
def registerFilesInDataset(self,idMap): # loop over all rse attachmentList = [] for rse,tmpMap in idMap.iteritems(): # loop over all datasets for datasetName,fileList in tmpMap.iteritems(): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) files = [] for tmpFile in fileList: # extract scope from LFN if available if 'name' in tmpFile: lfn = tmpFile['name'] else: lfn = tmpFile['lfn'] if ':' in lfn: s, lfn = lfn.split(':') else: s = scope # set metadata meta = {} if 'guid' in tmpFile: meta['guid'] = tmpFile['guid'] if 'events' in tmpFile: meta['events'] = tmpFile['events'] if 'lumiblocknr' in tmpFile: meta['lumiblocknr'] = tmpFile['lumiblocknr'] if 'panda_id' in tmpFile: meta['panda_id'] = tmpFile['panda_id'] if 'campaign' in tmpFile: meta['campaign'] = tmpFile['campaign'] if 'bytes' in tmpFile: fsize = tmpFile['bytes'] else: fsize = tmpFile['size'] # set mandatory fields file = {'scope': s, 'name' : lfn, 'bytes': fsize, 'meta' : meta} if 'checksum' in tmpFile: checksum = tmpFile['checksum'] if checksum.startswith('md5:'): file['md5'] = checksum[4:] elif checksum.startswith('ad:'): file['adler32'] = checksum[3:] if 'surl' in tmpFile: file['pfn'] = tmpFile['surl'] # append files files.append(file) # add attachment attachment = {'scope':scope, 'name':dsn, 'dids':files} if rse != None: attachment['rse'] = rse attachmentList.append(attachment) # add files client = RucioClient() return client.add_files_to_datasets(attachmentList,ignore_duplicate=True)
def __init__(self): self.default_quota = (10 ** 6) # 1 MB for testing self.client = Client() self.CRIC_USERS_API = 'https://cms-cric.cern.ch/api/accounts/user/query/list/?json' with open('config_institute_policy.json') as policy_file: self._policy = json.load(policy_file)
def modify_protocol(args): from rucio.client import Client client = Client(account="transfer_ops") changed = [] for protocol in client.get_protocols(args.rse[0]): if protocol[u'scheme'] in args.scheme: domains = protocol[u'domains'] if args.wan_all: if domains[u'wan'] == DOMAIN_ALL: continue changed.append(protocol) domains[u'wan'] = DOMAIN_ALL elif args.wan_read: if domains[u'wan'] == DOMAIN_READ: continue changed.append(protocol) domains[u'wan'] = DOMAIN_READ ok = client.update_protocols( args.rse[0], protocol[u'scheme'], {"domains": domains}, hostname=protocol[u'hostname'], port=protocol[u'port'], ) if not ok: raise RuntimeError("Failed to update protocol") if len(changed): print("Successfully changed protocols") else: print("No protocols were modified")
def registerFilesInDataset(self,idMap,filesWoRSEs=None): # loop over all rse attachmentList = [] for rse,tmpMap in idMap.iteritems(): # loop over all datasets for datasetName,fileList in tmpMap.iteritems(): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) filesWithRSE = [] filesWoRSE = [] for tmpFile in fileList: # convert file attribute file = self.convFileAttr(tmpFile, scope) # append files if rse != None and (filesWoRSEs is None or file['name'] not in filesWoRSEs): filesWithRSE.append(file) else: if 'pfn' in file: del file['pfn'] filesWoRSE.append(file) # add attachment if len(filesWithRSE) > 0: attachment = {'scope':scope, 'name':dsn, 'dids':filesWithRSE, 'rse':rse} attachmentList.append(attachment) if len(filesWoRSE) > 0: attachment = {'scope':scope, 'name':dsn, 'dids':filesWoRSE} attachmentList.append(attachment) # add files client = RucioClient() return client.add_files_to_datasets(attachmentList,ignore_duplicate=True)
def registerDatasetLocation(self,dsn,rses,lifetime=None,owner=None): if lifetime != None: lifetime = lifetime*24*60*60 scope,dsn = self.extract_scope(dsn) dids = [] did = {'scope': scope, 'name': dsn} dids.append(did) # make location rses.sort() location = '|'.join(rses) # check if a replication rule already exists client = RucioClient() # owner if owner == None: owner = client.account for rule in client.list_did_rules(scope=scope, name=dsn): if (rule['rse_expression'] == location) and (rule['account'] == client.account): return True try: client.add_replication_rule(dids=dids,copies=1,rse_expression=location,weight=None, lifetime=lifetime, grouping='DATASET', account=owner, locked=False, notify='N',ignore_availability=True) except Duplicate: pass return True
def listDatasets(self,datasetName,old=False): result = {} # extract scope from dataset scope,dsn = self.extract_scope(datasetName) if dsn.endswith('/'): dsn = dsn[:-1] collection = 'container' else: collection = 'dataset' filters = {'name': dsn} try: # get dids client = RucioClient() for name in client.list_dids(scope, filters, type=collection): vuid = hashlib.md5(scope + ':' + name).hexdigest() vuid = '%s-%s-%s-%s-%s' % (vuid[0:8], vuid[8:12], vuid[12:16], vuid[16:20], vuid[20:32]) duid = vuid # add / if datasetName.endswith('/') and not name.endswith('/'): name += '/' if old or not ':' in datasetName: keyName = name else: keyName = str('%s:%s' % (scope, name)) if keyName not in result: result[keyName] = {'duid': duid, 'vuids': [vuid]} return result,'' except: errType,errVale = sys.exc_info()[:2] return None,'%s %s' % (errType,errVale)
def registerZipFiles(self,zipMap): # no zip files if len(zipMap) == 0: return client = RucioClient() # loop over all zip files for zipFileName, zipFileAttr in zipMap.iteritems(): # convert file attribute zipFile = self.convFileAttr(zipFileAttr, zipFileAttr['scope']) # loop over all contents files = [] for conFileAttr in zipFileAttr['files']: # get scope scope,dsn = self.extract_scope(conFileAttr['ds']) # convert file attribute conFile = self.convFileAttr(conFileAttr, scope) conFile['type'] = 'FILE' if 'pfn' in conFile: del conFile['pfn'] # append files files.append(conFile) # register zip file for rse in zipFileAttr['rse']: client.add_replicas(rse=rse, files=[zipFile]) # add files nFiles = 100 iFiles = 0 while iFiles < len(files): client.add_files_to_archive(scope=zipFile['scope'], name=zipFile['name'], files=files[iFiles:iFiles+nFiles]) iFiles += nFiles
def listFileReplicas(self,scopes,lfns,rses=None): try: client = RucioClient() dids = [] iGUID = 0 nGUID = 1000 retVal = {} for scope,lfn in zip(scopes,lfns): iGUID += 1 dids.append({'scope':scope,'name':lfn}) if len(dids) % nGUID == 0 or iGUID == len(lfns): for tmpDict in client.list_replicas(dids): tmpLFN = str(tmpDict['name']) tmpRses = tmpDict['rses'].keys() # RSE selection if rses is not None: newRSEs = [] for tmpRse in tmpRses: if tmpRse in rses: newRSEs.append(tmpRse) tmpRses = newRSEs if len(tmpRses) > 0: retVal[tmpLFN] = tmpRses dids = [] return True,retVal except: errType,errVale = sys.exc_info()[:2] return False,'%s %s' % (errType,errVale)
def getRseUsage(self, rse, src='srm'): retMap = {} try: client = RucioClient() itr = client.get_rse_usage(rse) # look for srm for item in itr: if item['source'] == src: try: total = item['total'] / 1024 / 1024 / 1024 except: total = None try: used = item['used'] / 1024 / 1024 / 1024 except: used = None try: free = item['free'] / 1024 / 1024 / 1024 except: free = None retMap = {'total': total, 'used': used, 'free': free} break except: pass return retMap
def modify_protocol(args): from rucio.client import Client client = Client(account="transfer_ops") changed = [] for protocol in client.get_protocols(args.rse[0]): if protocol[u'scheme'] in args.scheme: domains = protocol[u'domains'] if args.wan_read is not None: changed.append(protocol) domains[u'wan'][u'read'] = args.wan_read if args.wan_write is not None: changed.append(protocol) domains[u'wan'][u'write'] = args.wan_write if args.wan_tpc is not None: changed.append(protocol) domains[u'wan'][u'third_party_copy'] = args.wan_tpc if args.wan_delete is not None: changed.append(protocol) domains[u'wan'][u'delete'] = args.wan_delete ok = client.update_protocols( args.rse[0], protocol[u'scheme'], {"domains": domains}, hostname=protocol[u'hostname'], port=protocol[u'port'], ) if not ok: raise RuntimeError("Failed to update protocol") if len(changed): print("Successfully changed protocols") else: print("No protocols were modified")
def listFileReplicas(self, scopes, lfns, rses=None): try: client = RucioClient() dids = [] iGUID = 0 nGUID = 1000 retVal = {} for scope, lfn in zip(scopes, lfns): iGUID += 1 dids.append({'scope': scope, 'name': lfn}) if len(dids) % nGUID == 0 or iGUID == len(lfns): for tmpDict in client.list_replicas(dids, ['srm']): tmpLFN = str(tmpDict['name']) tmpRses = tmpDict['rses'].keys() # RSE selection if rses is not None: newRSEs = [] for tmpRse in tmpRses: if tmpRse in rses: newRSEs.append(tmpRse) tmpRses = newRSEs if len(tmpRses) > 0: retVal[tmpLFN] = tmpRses dids = [] return True, retVal except: errType, errVale = sys.exc_info()[:2] return False, '%s %s' % (errType, errVale)
def registerDatasetLocation(self,dsn,rses,lifetime=None,owner=None,activity=None,scope=None,asynchronous=False, grouping='DATASET',notify='N'): if grouping is None: grouping = 'DATASET' presetScope = scope if lifetime != None: lifetime = lifetime*24*60*60 scope,dsn = self.extract_scope(dsn) if presetScope is not None: scope = presetScope dids = [] did = {'scope': scope, 'name': dsn} dids.append(did) # make location rses.sort() location = '|'.join(rses) # check if a replication rule already exists client = RucioClient() # owner if owner is None: owner = client.account for rule in client.list_did_rules(scope=scope, name=dsn): if (rule['rse_expression'] == location) and (rule['account'] == owner): return True try: client.add_replication_rule(dids=dids,copies=1,rse_expression=location,weight=None, lifetime=lifetime, grouping=grouping, account=owner, locked=False,activity=activity,notify=notify, ignore_availability=True,) except (Duplicate,DuplicateRule): pass return True
def listFilesInDataset(self,datasetName,long=False,fileList=None): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) if dsn.endswith('/'): dsn = dsn[:-1] client = RucioClient() return_dict = {} for x in client.list_files(scope, dsn, long=long): tmpLFN = str(x['name']) if fileList != None: genLFN = re.sub('\.\d+$','',tmpLFN) if not tmpLFN in fileList and not genLFN in fileList: continue dq2attrs = {} dq2attrs['chksum'] = "ad:" + str(x['adler32']) dq2attrs['md5sum'] = dq2attrs['chksum'] dq2attrs['checksum'] = dq2attrs['chksum'] dq2attrs['fsize'] = x['bytes'] dq2attrs['filesize'] = dq2attrs['fsize'] dq2attrs['scope'] = str(x['scope']) dq2attrs['events'] = str(x['events']) if long: dq2attrs['lumiblocknr'] = str(x['lumiblocknr']) guid = str('%s-%s-%s-%s-%s' % (x['guid'][0:8], x['guid'][8:12], x['guid'][12:16], x['guid'][16:20], x['guid'][20:32])) dq2attrs['guid'] = guid return_dict[tmpLFN] = dq2attrs return (return_dict, None)
def getZipFiles(self, dids, rses): try: client = RucioClient() data = [] iGUID = 0 nGUID = 1000 retVal = {} for did in dids: iGUID += 1 scope, lfn = did.split(':') data.append({'scope':scope,'name':lfn}) if len(data) % nGUID == 0 or iGUID == len(dids): for tmpDict in client.list_replicas(data): tmpScope = str(tmpDict['scope']) tmpLFN = str(tmpDict['name']) tmpDID = '{0}:{1}'.format(tmpScope, tmpLFN) tmpRses = tmpDict['rses'].keys() # RSE selection for pfn, pfnData in tmpDict['pfns'].iteritems(): if (rses is None or pfnData['rse'] in rses) and pfnData['domain'] == 'zip': zipFileName = pfn.split('/')[-1] zipFileName = re.sub('\?.+$', '', zipFileName) retVal[tmpDID] = client.get_metadata(tmpScope, zipFileName) break data = [] return True, retVal except: errType, errVale = sys.exc_info()[:2] return False, '%s %s' % (errType,errVale)
def listDatasets(self, datasetName, old=False): result = {} # extract scope from dataset scope, dsn = self.extract_scope(datasetName) if dsn.endswith('/'): dsn = dsn[:-1] collection = 'container' else: collection = 'dataset' filters = {'name': dsn} try: # get dids client = RucioClient() for name in client.list_dids(scope, filters, type=collection): vuid = hashlib.md5(scope + ':' + name).hexdigest() vuid = '%s-%s-%s-%s-%s' % (vuid[0:8], vuid[8:12], vuid[12:16], vuid[16:20], vuid[20:32]) duid = vuid # add / if datasetName.endswith('/') and not name.endswith('/'): name += '/' if old or not ':' in datasetName: keyName = name else: keyName = str('%s:%s' % (scope, name)) if keyName not in result: result[keyName] = {'duid': duid, 'vuids': [vuid]} return result, '' except: errType, errVale = sys.exc_info()[:2] return None, '%s %s' % (errType, errVale)
def listDatasetsByGUIDs(self,guids): client = RucioClient() result = {} for guid in guids: datasets = [str('%s:%s' % (i['scope'], i['name'])) for i in client.get_dataset_by_guid(guid)] result[guid] = datasets return result
def getRseUsage(self,rse,src='srm'): retMap = {} try: client = RucioClient() itr = client.get_rse_usage(rse) # look for srm for item in itr: if item['source'] == src: try: total = item['total']/1024/1024/1024 except: total = None try: used = item['used']/1024/1024/1024 except: used = None try: free = item['free']/1024/1024/1024 except: free = None retMap = {'total':total, 'used':used, 'free':free} break except: pass return retMap
def resolve_replicas(self, files, protocols): # build list of local ddmendpoints: group by site # load ALL ddmconf self.ddmconf.update(self.si.resolveDDMConf([])) ddms = {} for ddm, dat in self.ddmconf.iteritems(): ddms.setdefault(dat['site'], []).append(dat) for fdat in files: # build and order list of local ddms ddmdat = self.ddmconf.get(fdat.ddmendpoint) if not ddmdat: raise Exception("Failed to resolve ddmendpoint by name=%s send by Panda job, please check configuration. fdat=%s" % (fdat.ddmendpoint, fdat)) if not ddmdat['site']: raise Exception("Failed to resolve site name of ddmendpoint=%s. please check ddm declaration: ddmconf=%s ... fdat=%s" % (fdat.ddmendpoint, ddmconf, fdat)) localddms = ddms.get(ddmdat['site']) # sort/filter ddms (as possible input source) fdat.inputddms = self._prepare_input_ddm(ddmdat, localddms) # load replicats from Rucio from rucio.client import Client c = Client() dids = [dict(scope=e.scope, name=e.lfn) for e in files] schemes = ['srm', 'root', 'https', 'gsiftp'] # Get the replica list try: replicas = c.list_replicas(dids, schemes=schemes) except Exception, e: raise PilotException("Failed to get replicas from Rucio: %s" % e, code=PilotErrors.ERR_FAILEDLFCGETREPS)
def listFilesInDataset(self, datasetName, long=False, fileList=None): # extract scope from dataset scope, dsn = self.extract_scope(datasetName) if dsn.endswith('/'): dsn = dsn[:-1] client = RucioClient() return_dict = {} for x in client.list_files(scope, dsn, long=long): tmpLFN = str(x['name']) if fileList != None: genLFN = re.sub('\.\d+$', '', tmpLFN) if not tmpLFN in fileList and not genLFN in fileList: continue dq2attrs = {} dq2attrs['chksum'] = "ad:" + str(x['adler32']) dq2attrs['md5sum'] = dq2attrs['chksum'] dq2attrs['checksum'] = dq2attrs['chksum'] dq2attrs['fsize'] = x['bytes'] dq2attrs['filesize'] = dq2attrs['fsize'] dq2attrs['scope'] = str(x['scope']) dq2attrs['events'] = str(x['events']) if long: dq2attrs['lumiblocknr'] = str(x['lumiblocknr']) guid = str('%s-%s-%s-%s-%s' % (x['guid'][0:8], x['guid'][8:12], x['guid'][12:16], x['guid'][16:20], x['guid'][20:32])) dq2attrs['guid'] = guid return_dict[tmpLFN] = dq2attrs return (return_dict, None)
def registerZipFiles(self, zipMap): # no zip files if len(zipMap) == 0: return client = RucioClient() # loop over all zip files for zipFileName, zipFileAttr in zipMap.iteritems(): # convert file attribute zipFile = self.convFileAttr(zipFileAttr, zipFileAttr['scope']) # loop over all contents files = [] for conFileAttr in zipFileAttr['files']: # get scope scope, dsn = self.extract_scope(conFileAttr['ds']) # convert file attribute conFile = self.convFileAttr(conFileAttr, scope) conFile['type'] = 'FILE' if 'pfn' in conFile: del conFile['pfn'] # append files files.append(conFile) # register zip file for rse in zipFileAttr['rse']: client.add_replicas(rse=rse, files=[zipFile]) # add files client.add_files_to_archive(scope=zipFile['scope'], name=zipFile['name'], files=files)
def generate_desired(gridjobspec): c = Client() scope, name = gridjobspec['inDS'].split(':', 1) nFilesPerJob = gridjobspec.get('nFilesPerJob', 3) files = sorted(list(c.list_files(scope, name))) log.warning('files %s', len(files)) def chunks(l, n): for i in range(0, len(l), n): yield l[i:i + n] filelists = list(chunks(files, nFilesPerJob)) jobtemplate = json.load(open('slicejob_template.json')) configmaps = [] jobs = [] for index, fl in enumerate(filelists): cmapname = 'task-{taskid}-{index}-config'.format( taskid=gridjobspec['taskid'], index=index) jobname = 'task-{taskid}-{index}-job'.format( taskid=gridjobspec['taskid'], index=index) namespace = 'default' jobconfig = { "dids": sorted([':'.join([x['scope'], x['name']]) for x in fl]), "exec_template": gridjobspec['exec_template'], "outputs": gridjobspec['outputs'], "taskid": gridjobspec['taskid'], "subjobid": index, "user": gridjobspec['user'] } configmap = { 'apiVersion': 'v1', 'kind': 'ConfigMap', 'metadata': { 'name': cmapname, 'namespace': namespace }, 'data': { 'jobconfig.json': json.dumps(jobconfig, sort_keys=True) } } job = json.load(open('slicejob_template.json')) job['metadata']['name'] = jobname job['metadata']['namespace'] = namespace job['spec']['template']['spec']['volumes'][0]['configMap'][ 'name'] = cmapname job['spec']['template']['spec']['initContainers'][1][ 'image'] = gridjobspec['image'] configmaps.append(configmap) jobs.append(job) log.warning('jobs: %s cmaps: %s', len(jobs), len(configmaps)) children = configmaps + jobs log.warning('children hash %s', hashlib.sha1(json.dumps(children, sort_keys=True)).hexdigest()) return len(jobs), children
def deleteFilesFromDataset(self, datasetName, files): # extract scope from dataset scope, dsn = self.extract_scope(datasetName) client = RucioClient() try: # delete files client.detach_dids(scope=scope, name=dsn, dids=files) except DataIdentifierNotFound: pass
def deleteFilesFromDataset(self,datasetName,files): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) client = RucioClient() try: # delete files client.detach_dids(scope=scope,name=dsn,dids=files) except DataIdentifierNotFound: pass
def closeDataset(self,dsn): # register dataset client = RucioClient() try: scope,dsn = self.extract_scope(dsn) client.set_status(scope,dsn,open=False) except (UnsupportedOperation,DataIdentifierNotFound): pass return True
def closeDataset(self, dsn): # register dataset client = RucioClient() try: scope, dsn = self.extract_scope(dsn) client.set_status(scope, dsn, open=False) except (UnsupportedOperation, DataIdentifierNotFound): pass return True
def __init__(self, account, auth_type=None, exclude=DEFAULT_EXCLUDE_LINKS, distance=None, rselist=None): if distance is None: distance = DEFAULT_DISTANCE_RULES self.rcli = Client(account=account, auth_type=auth_type) self._get_rselist(rselist) self._get_matrix(distance, exclude)
def getMetaData(self,dsn): # register dataset client = RucioClient() try: scope,dsn = self.extract_scope(dsn) return True,client.get_metadata(scope,dsn) except DataIdentifierNotFound: return True,None except: errType,errVale = sys.exc_info()[:2] return False,'%s %s' % (errType,errVale)
def setMetaData(self,dsn,metadata=None): # register dataset client = RucioClient() try: scope,dsn = self.extract_scope(dsn) for tmpKey,tmpValue in metadata.iteritems(): client.set_metadata(scope,dsn,key=tmpKey,value=tmpValue) except: errType,errVale = sys.exc_info()[:2] return False,'%s %s' % (errType,errVale) return True,''
def setMetaData(self, dsn, metadata=None): # register dataset client = RucioClient() try: scope, dsn = self.extract_scope(dsn) for tmpKey, tmpValue in metadata.iteritems(): client.set_metadata(scope, dsn, key=tmpKey, value=tmpValue) except: errType, errVale = sys.exc_info()[:2] return False, '%s %s' % (errType, errVale) return True, ''
def getMetaData(self, dsn): # register dataset client = RucioClient() try: scope, dsn = self.extract_scope(dsn) return True, client.get_metadata(scope, dsn) except DataIdentifierNotFound: return True, None except: errType, errVale = sys.exc_info()[:2] return False, '%s %s' % (errType, errVale)
def __init__(self): try: # set up Rucio environment os.environ['RUCIO_ACCOUNT'] = DDM_ACCOUNT_NAME os.environ['RUCIO_AUTH_TYPE'] = 'x509_proxy' os.environ['X509_USER_PROXY'] = self._get_proxy() self.ddm_client = Client() except CannotAuthenticate as ex: logger.critical('DDM: authentication failed: {0}'.format(str(ex))) except Exception as ex: logger.critical('DDM: initialization failed: {0}'.format(str(ex)))
def registerFiles(self,files,rse): client = RucioClient() try: # add replicas client.add_replicas(files=files,rse=rse) except FileAlreadyExists: pass try: # add rule client.add_replication_rule(files,copies=1,rse_expression=rse) except DuplicateRule: pass
def __init__(self): aCTLDMXProcess.__init__(self) self.rucio = Client() self.rucio_prometheus_port = int( self.arcconf.get(['monitor', 'rucioprometheusport']) or 0) if self.rucio_prometheus_port: start_http_server(self.rucio_prometheus_port) self.collector = aCTRucioCollector() REGISTRY.register(self.collector) else: self.log.info('Prometheus monitoring not enabled')
def registerFiles(self, files, rse): client = RucioClient() try: # add replicas client.add_replicas(files=files, rse=rse) except FileAlreadyExists: pass try: # add rule client.add_replication_rule(files, copies=1, rse_expression=rse) except DuplicateRule: pass
def eraseDataset(self,dsn,scope=None): presetScope = scope # register dataset client = RucioClient() try: scope,dsn = self.extract_scope(dsn) if presetScope is not None: scope = presetScope client.set_metadata(scope=scope, name=dsn, key='lifetime', value=0.0001) except: errType,errVale = sys.exc_info()[:2] return False,'%s %s' % (errType,errVale) return True,''
def registerFilesInDataset(self, idMap, filesWoRSEs=None): # loop over all rse attachmentList = [] for rse in idMap: tmpMap = idMap[rse] # loop over all datasets for datasetName in tmpMap: fileList = tmpMap[datasetName] # extract scope from dataset scope, dsn = self.extract_scope(datasetName) filesWithRSE = [] filesWoRSE = [] for tmpFile in fileList: # convert file attribute file = self.convFileAttr(tmpFile, scope) # append files if rse is not None and (filesWoRSEs is None or file['name'] not in filesWoRSEs): filesWithRSE.append(file) else: if 'pfn' in file: del file['pfn'] filesWoRSE.append(file) # add attachment if len(filesWithRSE) > 0: nFiles = 100 iFiles = 0 while iFiles < len(filesWithRSE): attachment = { 'scope': scope, 'name': dsn, 'dids': filesWithRSE[iFiles:iFiles + nFiles], 'rse': rse } attachmentList.append(attachment) iFiles += nFiles if len(filesWoRSE) > 0: nFiles = 100 iFiles = 0 while iFiles < len(filesWoRSE): attachment = { 'scope': scope, 'name': dsn, 'dids': filesWoRSE[iFiles:iFiles + nFiles] } attachmentList.append(attachment) iFiles += nFiles # add files client = RucioClient() client.add_files_to_datasets(attachmentList, ignore_duplicate=True) return True
def submit_transfer_to_rucio(name, source_url, bytes, adler32): _LOGGER.info("Here") # transfer pre-prod -> prod -> snic rucio_client = Client() # TODO: scope should be extracted from the path: Top directory scope = 'functional_tests' try: replica = { 'scope': scope, 'name': name, 'pfn': source_url, 'bytes': int(bytes), 'adler32': adler32 } _LOGGER.debug('Register replica {}'.format(str(replica))) rse = 'NDGF-PREPROD' account = 'garvin' rucio_client.add_replicas(rse=rse, files=[replica]) kwargss = [{ 'rse_expression': 'NDGF-PREPROD', 'lifetime': 86400 }, { 'rse_expression': 'NDGF', 'source_replica_expression': 'NDGF-PREPROD', 'lifetime': 86400 }, { 'rse_expression': 'SNIC', 'source_replica_expression': 'NDGF', 'lifetime': 86400 }] for kwargs in kwargss: rule = rucio_client.add_replication_rule(dids=[{ 'scope': scope, 'name': name }], account=account, copies=1, grouping='NONE', weight=None, locked=False, **kwargs) _LOGGER.info('Added rule for file to {}: {}'.format(kwargs, rule)) except: _LOGGER.error(traceback.format_exc())
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): dataset_config = config.change_view( default_on_change=TriggerResync(['datasets', 'parameters'])) self._lumi_filter = dataset_config.get_lookup( ['lumi filter', '%s lumi filter' % datasource_name], default={}, parser=parse_lumi_filter, strfun=str_lumi) if not self._lumi_filter.empty(): config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = dataset_config.get_bool( ['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty()) config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedex_filter = dataset_config.get_filter( 'phedex sites', '-* T1_*_Disk T2_* T3_*', default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter') self._only_complete = dataset_config.get_bool('only complete sites', True) self._only_valid = dataset_config.get_bool('only valid', True) self._allow_phedex = dataset_config.get_bool('allow phedex', True) self._location_format = dataset_config.get_enum( 'location format', CMSLocationFormat, CMSLocationFormat.hostname) self._sitedb = CRIC() token = AccessToken.create_instance('VomsProxy', create_config(), 'token') self._rucio = Client( account=self._sitedb.dn_to_username(token.get_fq_user_name())) dataset_expr_parts = split_opt(dataset_expr, '@#') (self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts instance_default = dataset_config.get('dbs instance', '') self._dataset_instance = self._dataset_instance or instance_default if not self._dataset_instance: self._dataset_instance = 'prod/global' elif '/' not in self._dataset_instance: self._dataset_instance = 'prod/%s' % self._dataset_instance self._dataset_block_selector = self._dataset_block_selector or 'all'
def check_status(self, jobspec): # make logger tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID), method_name='check_status') tmpLog.debug('start') # loop over all files allChecked = True oneErrMsg = None transferStatus = dict() for fileSpec in jobspec.outFiles: # skip already don if fileSpec.status in ['finished', 'failed']: continue # get transfer ID transferID = fileSpec.fileAttributes['transferID'] if transferID not in transferStatus: # get status try: rucioAPI = RucioClient() ruleInfo = rucioAPI.get_replication_rule(transferID) tmpTransferStatus = ruleInfo['state'] tmpLog.debug('got state={0} for rule={1}'.format( tmpTransferStatus, transferID)) except RuleNotFound: tmpLog.error('rule {0} not found'.format(transferID)) tmpTransferStatus = 'FAILED' except: err_type, err_value = sys.exc_info()[:2] errMsg = "{0} {1}".format(err_type.__name__, err_value) tmpLog.error( 'failed to get status for rule={0} with {1}'.format( transferID, errMsg)) # set dummy not to lookup again tmpTransferStatus = None allChecked = False # keep one message if oneErrMsg is None: oneErrMsg = errMsg tmpTransferStatus = 'OK' transferStatus[transferID] = tmpTransferStatus # final status if transferStatus[transferID] == 'OK': fileSpec.status = 'finished' elif transferStatus[transferID] in ['FAILED', 'CANCELED']: fileSpec.status = 'failed' if allChecked: return True, '' else: return False, oneErrMsg
def getDatasetSize(self,datasetName): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) client = RucioClient() tSize = 0 try: for x in client.list_files(scope, dsn, long=long): tSize += x['bytes'] return True,tSize except DataIdentifierNotFound: return None,'dataset not found' except: errtype, errvalue = sys.exc_info()[:2] errMsg = '{0} {1}'.format(errtype.__name__, errvalue) return False,errMsg
def listDatasetsInContainer(self,containerName): result = [] # extract scope from dataset scope,cn = self.extract_scope(containerName) if cn.endswith('/'): cn = cn[:-1] try: # get dids client = RucioClient() for i in client.list_content(scope, cn): if i['type'] == 'DATASET': result.append(str('%s:%s' % (i['scope'], i['name']))) return result,'' except: errType,errVale = sys.exc_info()[:2] return None,'%s %s' % (errType,errVale)
def deleteDatasetReplicas(self,datasetName,locations): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) client = RucioClient() try: for rule in self.client.list_did_rules(scope, dsn): if rule['account'] != self.client.account: continue if rule['rse_expression'] in locations: client.delete_replication_rule(rule['id']) except DataIdentifierNotFound: pass except: errtype, errvalue = sys.exc_info()[:2] errMsg = '{0} {1}'.format(errtype.__name__, errvalue) return False,errMsg return True,''
def getNumberOfFiles(self,datasetName,presetScope=None): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) if presetScope is not None: scope = presetScope client = RucioClient() nFiles = 0 try: for x in client.list_files(scope, dsn, long=long): nFiles += 1 return True,nFiles except DataIdentifierNotFound: return None,'dataset not found' except: errtype, errvalue = sys.exc_info()[:2] errMsg = '{0} {1}'.format(errtype.__name__, errvalue) return False,errMsg
def eraseDataset(self,dsn,scope=None, grace_period=None): presetScope = scope # register dataset client = RucioClient() try: scope,dsn = self.extract_scope(dsn) if presetScope is not None: scope = presetScope if grace_period is not None: value = grace_period * 60 * 60 else: value = 0.0001 client.set_metadata(scope=scope, name=dsn, key='lifetime', value=value) except: errType,errVale = sys.exc_info()[:2] return False,'%s %s' % (errType,errVale) return True,''
def listDatasetReplicas(self,datasetName): retMap = {} # extract scope from dataset scope,dsn = self.extract_scope(datasetName) try: # get replicas client = RucioClient() itr = client.list_dataset_replicas(scope,dsn) for item in itr: rse = item["rse"] retMap[rse] = [{'total':item["length"], 'found':item["available_length"], 'immutable':1}] return 0,retMap except: errType,errVale = sys.exc_info()[:2] return 1,'%s %s' % (errType,errVale)
def check_status(self, jobspec): # make logger tmpLog = self.make_logger(baseLogger, 'PandaID={0}'.format(jobspec.PandaID), method_name='check_status') tmpLog.debug('start') # loop over all files allChecked = True oneErrMsg = None transferStatus = dict() for fileSpec in jobspec.outFiles: # skip already don if fileSpec.status in ['finished', 'failed']: continue # get transfer ID transferID = fileSpec.fileAttributes['transferID'] if transferID not in transferStatus: # get status try: rucioAPI = RucioClient() ruleInfo = rucioAPI.get_replication_rule(transferID) tmpTransferStatus = ruleInfo['state'] tmpLog.debug('got state={0} for rule={1}'.format(tmpTransferStatus, transferID)) except RuleNotFound: tmpLog.error('rule {0} not found'.format(transferID)) tmpTransferStatus = 'FAILED' except: err_type, err_value = sys.exc_info()[:2] errMsg = "{0} {1}".format(err_type.__name__, err_value) tmpLog.error('failed to get status for rule={0} with {1}'.format(transferID, errMsg)) # set dummy not to lookup again tmpTransferStatus = None allChecked = False # keep one message if oneErrMsg is None: oneErrMsg = errMsg tmpTransferStatus = 'OK' transferStatus[transferID] = tmpTransferStatus # final status if transferStatus[transferID] == 'OK': fileSpec.status = 'finished' elif transferStatus[transferID] in ['FAILED', 'CANCELED']: fileSpec.status = 'failed' if allChecked: return True, '' else: return False, oneErrMsg
def registerFilesInDataset(self,idMap): # loop over all rse attachmentList = [] for rse,tmpMap in idMap.iteritems(): # loop over all datasets for datasetName,fileList in tmpMap.iteritems(): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) files = [] for tmpFile in fileList: # extract scope from LFN if available lfn = tmpFile['lfn'] if ':' in lfn: s, lfn = lfn.split(':') else: s = scope # set metadata meta = {'guid': tmpFile['guid']} if 'events' in tmpFile: meta['events'] = tmpFile['events'] if 'lumiblocknr' in tmpFile: meta['lumiblocknr'] = tmpFile['lumiblocknr'] # set mandatory fields file = {'scope': s, 'name' : lfn, 'bytes': tmpFile['size'], 'meta' : meta} checksum = tmpFile['checksum'] if checksum.startswith('md5:'): file['md5'] = checksum[4:] elif checksum.startswith('ad:'): file['adler32'] = checksum[3:] if 'surl' in tmpFile: file['pfn'] = tmpFile['surl'] # append files files.append(file) # add attachment attachment = {'scope':scope, 'name':dsn, 'dids':files} if rse != None: attachment['rse'] = rse attachmentList.append(attachment) # add files client = RucioClient() return client.add_files_to_datasets(attachmentList,ignore_duplicate=True)
def registerContainer(self,cname,datasets=[],presetScope=None): if cname.endswith('/'): cname = cname[:-1] # register container client = RucioClient() try: scope,dsn = self.extract_scope(cname) if presetScope is not None: scope = presetScope client.add_container(scope=scope, name=cname) except DataIdentifierAlreadyExists: pass # add files if len(datasets) > 0: try: dsns = [] for ds in datasets: ds_scope, ds_name = self.extract_scope(ds) if ds_scope: dsn = {'scope': ds_scope, 'name': ds_name} else: dsn = {'scope': scope, 'name': ds} dsns.append(dsn) client.add_datasets_to_container(scope=scope, name=cname, dsns=dsns) except DuplicateContent: for ds in dsns: try: client.add_datasets_to_container(scope=scope, name=cname, dsns=[ds]) except DuplicateContent: pass return True
def registerFilesInDataset(self,idMap,filesWoRSEs=None): # loop over all rse attachmentList = [] for rse,tmpMap in idMap.iteritems(): # loop over all datasets for datasetName,fileList in tmpMap.iteritems(): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) filesWithRSE = [] filesWoRSE = [] for tmpFile in fileList: # convert file attribute file = self.convFileAttr(tmpFile, scope) # append files if rse != None and (filesWoRSEs is None or file['name'] not in filesWoRSEs): filesWithRSE.append(file) else: if 'pfn' in file: del file['pfn'] filesWoRSE.append(file) # add attachment if len(filesWithRSE) > 0: nFiles = 100 iFiles = 0 while iFiles < len(filesWithRSE): attachment = {'scope':scope, 'name':dsn, 'dids':filesWithRSE[iFiles:iFiles+nFiles], 'rse':rse} attachmentList.append(attachment) iFiles += nFiles if len(filesWoRSE) > 0: nFiles = 100 iFiles = 0 while iFiles < len(filesWoRSE): attachment = {'scope':scope, 'name':dsn, 'dids':filesWoRSE[iFiles:iFiles+nFiles]} attachmentList.append(attachment) iFiles += nFiles # add files client = RucioClient() client.add_files_to_datasets(attachmentList, ignore_duplicate=True) return True
def deleteDatasetReplicas(self,datasetName): # extract scope from dataset scope,dsn = self.extract_scope(datasetName) client = RucioClient() rse_expressions = [] list_rses = [] result = [] try: for rule in self.client.list_did_rules(scope, dsn): if rule['state'] != 'OK' and rule['rse_expression'] not in rse_expressions: rse_expressions.append(rule['rse_expression']) for rse_expression in rse_expressions: for rse in client.list_rses(rse_expression): if rse not in list_rses: list_rses.append(rse['rse']) result = list_rses except DataIdentifierNotFound: pass except: errtype, errvalue = sys.exc_info()[:2] errMsg = '{0} {1}'.format(errtype.__name__, errvalue) return False,errMsg return True,result
def finger(self, userName): try: # get rucio API client = RucioClient() userInfo = None retVal = False for i in client.list_accounts(account_type='USER',identity=userName): userInfo = {'nickname':i['account'], 'email':i['email']} break if userInfo == None: # remove /CN=\d userName = re.sub('(/CN=\d+)+$','',userName) for i in client.list_accounts(account_type='USER',identity=userName): userInfo = {'nickname':i['account'], 'email':i['email']} break if userInfo is not None: retVal = True except: errtype, errvalue = sys.exc_info()[:2] errMsg = '{0} {1}'.format(errtype.__name__, errvalue) userInfo = errMsg return retVal,userInfo
class ruciowrapper(object): if 'RUCIO_ACCOUNT' not in os.environ: os.environ['RUCIO_ACCOUNT'] = get_rucio_account() if 'X509_USER_PROXY' not in os.environ: os.environ['X509_USER_PROXY'] = get_x509_proxy() client = None def __init__(self): try: self.client = Client() except Exception as e: logging.error('Failed to initiate Rucio client:' + str(e)) def getRucioAccountByDN(self, DN): values = ['rucio_account', 'create_time'] accounts = [] accounts.extend(RucioAccounts.objects.filter(certificatedn=DN).values(*values)) accountExists = len(accounts) if accountExists == 0 or (timezone.now() - accounts[0]['create_time']) > timedelta(days=7): if not self.client is None: try: accounts = [account['account'] for account in self.client.list_accounts(account_type='USER',identity=DN)] except Exception as e: logging.error('Failed to get accounts' + str(e)) return accounts if len(accounts) > 0: if (accountExists == 0): RucioAccounts.objects.filter(certificatedn=DN).delete() for account in accounts: accountRow = RucioAccounts( rucio_account = account, certificatedn = DN, create_time = timezone.now().date(), ) accountRow.save() else: accounts = [account['rucio_account'] for account in accounts] return accounts
# - Vincent Garonne, <*****@*****.**>, 2013 from rucio.client import Client from rucio.common.exception import Duplicate if __name__ == '__main__': info = [] f = open ('tools/atlas_accounts.csv') # f = open ('user_accounts.csv') for line in f.readlines(): account, dn, email = line.rstrip().split('\t') info.append((account, dn, email)) f.close() c = Client() for account, dn, email in info: try: c.add_account(account=account, type='USER') except Duplicate: print 'Account %(account)s already added' % locals() try: c.add_identity(account=account, identity=dn, authtype='X509', email=email , default=True) except Duplicate: print 'Identity %(account)s already added' % locals() try: scope = 'user.' + account c.add_scope(account, scope) except Duplicate:
import sys from rucio.client import Client as RucioClient client = RucioClient() # scope = sys.argv[1] # name = sys.argv[2] # rep.list_replicas([{'scope':'ams-2011B-ISS.B620-pass4', 'name':'1340252898.00981893.root'}]) # for x in rep.list_replicas([{'scope':scope, 'name':name}]): # print x from rucio.common.utils import generate_uuid account = 'chenghsi' scope = 'ams-user-chenghsi' name = 'file20150709T105442' # client.add_replication_rule(dids=[{'scope': 'ams-user-chenghsi', 'name':'file20150709T105442'}],copies=1,rse_expression='EOS01_AMS02SCRATCHDISK',weight=None, # lifetime=1, grouping='DATASET', account=account, locked=False, notify='N',ignore_availability=True) import pdb; pdb.set_trace() # for key, value in client.get_metadata(scope, name).iteritems(): # print key, value client.set_metadata(scope, name, 'guid', generate_uuid())
#!/usr/bin/env python # Copyright European Organization for Nuclear Research (CERN) # # Licensed under the Apache License, Version 2.0 (the "License"); # You may not use this file except in compliance with the License. # You may obtain a copy of the License at # http://www.apache.org/licenses/LICENSE-2.0 # # Authors: # - Vincent Garonne, <*****@*****.**>, 2013 from rucio.client import Client from rucio.common.exception import Duplicate if __name__ == '__main__': scopes = [] f = open ('tools/scopes.csv') for line in f.readlines(): scope = line.rstrip() scopes.append(scope) f.close() c = Client() for scope in scopes: try: c.add_scope('ddmusr01', scope) except Duplicate: print 'Scope %(scope)s already added' % locals()