def __init__(self, acct, hostUrl=None, authUrl=None, configDict=None): """ Constructs a Rucio object with the Client object embedded. In order to instantiate a Rucio Client object, it assumes the host has a proper rucio configuration file, where the default host and authentication host URL come from, as well as the X509 certificate information. :param acct: rucio account to be used :param hostUrl: defaults to the rucio config one :param authUrl: defaults to the rucio config one :param configDict: dictionary with extra parameters """ configDict = configDict or {} params = configDict.copy() params.setdefault('ca_cert', None) params.setdefault('auth_type', None) params.setdefault('creds', None) params.setdefault('timeout', 600) params.setdefault('user_agent', 'wmcore-client') self.logger = params.get("logger", logging.getLogger()) # yield output compatible with the PhEDEx service class self.phedexCompat = params.get("phedexCompatible", True) msg = "WMCore Rucio initialization with acct: %s, host: %s, auth: %s" % (acct, hostUrl, authUrl) msg += " and these extra parameters: %s" % params self.logger.info(msg) self.cli = Client(rucio_host=hostUrl, auth_host=authUrl, account=acct, ca_cert=params['ca_cert'], auth_type=params['auth_type'], creds=params['creds'], timeout=params['timeout'], user_agent=params['user_agent']) clientParams = {} for k in ("host", "auth_host", "auth_type", "account", "user_agent", "ca_cert", "creds", "timeout", "request_retries"): clientParams[k] = getattr(self.cli, k) self.logger.info("Rucio client initialization with: %s", clientParams)
def __init__(self): self.default_quota = (10 ** 6) # 1 MB for testing self.client = Client() self.CRIC_USERS_API = 'https://cms-cric.cern.ch/api/accounts/user/query/list/?json' with open('config_institute_policy.json') as policy_file: self._policy = json.load(policy_file)
def modify_protocol(args): from rucio.client import Client client = Client(account="transfer_ops") changed = [] for protocol in client.get_protocols(args.rse[0]): if protocol[u'scheme'] in args.scheme: domains = protocol[u'domains'] if args.wan_all: if domains[u'wan'] == DOMAIN_ALL: continue changed.append(protocol) domains[u'wan'] = DOMAIN_ALL elif args.wan_read: if domains[u'wan'] == DOMAIN_READ: continue changed.append(protocol) domains[u'wan'] = DOMAIN_READ ok = client.update_protocols( args.rse[0], protocol[u'scheme'], {"domains": domains}, hostname=protocol[u'hostname'], port=protocol[u'port'], ) if not ok: raise RuntimeError("Failed to update protocol") if len(changed): print("Successfully changed protocols") else: print("No protocols were modified")
def upload_dataset_yaml(yamlfile: str): """ Same as upload dataset Arguments specified in yaml files """ with open(yamlfile) as f: yaml_string = f.read() try: params = load(yaml_string=yaml_string, schema=datasetSchema) except YAMLValidationError as e: print(e) raise typer.Exit() params = params.data rucio_client = Client() dataset_path = params["specs"]["datasetPath"] dataset_name = params["specs"]["datasetName"] rule_params = params["specs"]["options"]["rule"] upload_params = params["specs"]["options"]["upload"] upload_dataset_and_create_rule(rucio_client, dataset_path, dataset_name, rule_params, upload_params)
def run(total_workers=1, once=False, inputfile=None, sleep_time=-1): """ Starts up the automatix threads. """ setup_logging() if rucio.db.sqla.util.is_old_db(): raise exception.DatabaseException('Database was not updated, daemon won\'t start') try: sites = [s.strip() for s in config_get('automatix', 'sites').split(',')] except (NoOptionError, NoSectionError, RuntimeError): raise Exception('Could not load sites from configuration') if not inputfile: inputfile = '/opt/rucio/etc/automatix.json' if sleep_time == -1: try: sleep_time = config_get('automatix', 'sleep_time') except (NoOptionError, NoSectionError, RuntimeError): sleep_time = 30 try: account = config_get('automatix', 'account') except (NoOptionError, NoSectionError, RuntimeError): account = 'root' try: dataset_lifetime = config_get('automatix', 'dataset_lifetime') except (NoOptionError, NoSectionError, RuntimeError): dataset_lifetime = None try: set_metadata = config_get('automatix', 'set_metadata') except (NoOptionError, NoSectionError, RuntimeError): set_metadata = False try: scope = config_get('automatix', 'scope') client = Client() filters = {'scope': InternalScope('*', vo=client.vo)} if InternalScope(scope, vo=client.vo) not in list_scopes(filter_=filters): logging.log(logging.ERROR, 'Scope %s does not exist. Exiting', scope) GRACEFUL_STOP.set() except Exception: scope = False threads = list() for worker_number in range(0, total_workers): kwargs = {'worker_number': worker_number, 'total_workers': total_workers, 'once': once, 'sites': sites, 'sleep_time': sleep_time, 'account': account, 'inputfile': inputfile, 'set_metadata': set_metadata, 'scope': scope, 'dataset_lifetime': dataset_lifetime} threads.append(threading.Thread(target=automatix, kwargs=kwargs)) [thread.start() for thread in threads] while threads[0].is_alive(): logging.log(logging.DEBUG, 'Still %i active threads', len(threads)) [thread.join(timeout=3.14) for thread in threads]
def getNativeRucioClient(config=None, logger=None): """ instantiates a Rucio python Client for use in CRAB TaskWorker :param config: a TaskWorker configuration object in which at least the variables used below are defined :param logger: a valid logger instance :return: a Rucio Client object """ logger.info("Initializing native Rucio client") from rucio.client import Client nativeClient = Client(rucio_host=config.Services.Rucio_host, auth_host=config.Services.Rucio_authUrl, ca_cert=config.Services.Rucio_caPath, account=config.Services.Rucio_account, creds={ "client_cert": config.TaskWorker.cmscert, "client_key": config.TaskWorker.cmskey }, auth_type='x509') ret = nativeClient.ping() logger.info("Rucio server v.%s contacted", ret['version']) ret = nativeClient.whoami() logger.info("Rucio client initialized for %s in status %s", ret['account'], ret['status']) return nativeClient
def modify_protocol(args): from rucio.client import Client client = Client(account="transfer_ops") changed = [] for protocol in client.get_protocols(args.rse[0]): if protocol[u'scheme'] in args.scheme: domains = protocol[u'domains'] if args.wan_read is not None: changed.append(protocol) domains[u'wan'][u'read'] = args.wan_read if args.wan_write is not None: changed.append(protocol) domains[u'wan'][u'write'] = args.wan_write if args.wan_tpc is not None: changed.append(protocol) domains[u'wan'][u'third_party_copy'] = args.wan_tpc if args.wan_delete is not None: changed.append(protocol) domains[u'wan'][u'delete'] = args.wan_delete ok = client.update_protocols( args.rse[0], protocol[u'scheme'], {"domains": domains}, hostname=protocol[u'hostname'], port=protocol[u'port'], ) if not ok: raise RuntimeError("Failed to update protocol") if len(changed): print("Successfully changed protocols") else: print("No protocols were modified")
def generate_desired(gridjobspec): c = Client() scope, name = gridjobspec['inDS'].split(':', 1) nFilesPerJob = gridjobspec.get('nFilesPerJob', 3) files = sorted(list(c.list_files(scope, name))) log.warning('files %s', len(files)) def chunks(l, n): for i in range(0, len(l), n): yield l[i:i + n] filelists = list(chunks(files, nFilesPerJob)) jobtemplate = json.load(open('slicejob_template.json')) configmaps = [] jobs = [] for index, fl in enumerate(filelists): cmapname = 'task-{taskid}-{index}-config'.format( taskid=gridjobspec['taskid'], index=index) jobname = 'task-{taskid}-{index}-job'.format( taskid=gridjobspec['taskid'], index=index) namespace = 'default' jobconfig = { "dids": sorted([':'.join([x['scope'], x['name']]) for x in fl]), "exec_template": gridjobspec['exec_template'], "outputs": gridjobspec['outputs'], "taskid": gridjobspec['taskid'], "subjobid": index, "user": gridjobspec['user'] } configmap = { 'apiVersion': 'v1', 'kind': 'ConfigMap', 'metadata': { 'name': cmapname, 'namespace': namespace }, 'data': { 'jobconfig.json': json.dumps(jobconfig, sort_keys=True) } } job = json.load(open('slicejob_template.json')) job['metadata']['name'] = jobname job['metadata']['namespace'] = namespace job['spec']['template']['spec']['volumes'][0]['configMap'][ 'name'] = cmapname job['spec']['template']['spec']['initContainers'][1][ 'image'] = gridjobspec['image'] configmaps.append(configmap) jobs.append(job) log.warning('jobs: %s cmaps: %s', len(jobs), len(configmaps)) children = configmaps + jobs log.warning('children hash %s', hashlib.sha1(json.dumps(children, sort_keys=True)).hexdigest()) return len(jobs), children
def download(dest_path, did): from rucio.client import Client from rucio.client.downloadclient import DownloadClient client = Client() download_client = DownloadClient(client=client, logger=download_logger) results = download_client.download_dids([{'did': did, 'base_dir': dest_path}]) return results
def main(argv): # parameters if argv: rse_repo_file = argv[0] else: rse_repo_file = 'etc/rse_repository.json' json_data = open(rse_repo_file) repo_data = json.load(json_data) json_data.close() c = Client() for rse in repo_data: try: deterministic = repo_data[rse].get('deterministic', True) volatile = repo_data[rse].get('volatile', False) region_code = repo_data[rse].get('region_code') country_name = repo_data[rse].get('country_name') staging_area = repo_data[rse].get('staging_area') continent = repo_data[rse].get('continent') time_zone = repo_data[rse].get('time_zone') ISP = repo_data[rse].get('ISP') c.add_rse(rse, deterministic=deterministic, volatile=volatile, region_code=region_code, country_name=country_name, staging_area=staging_area, continent=continent, time_zone=time_zone, ISP=ISP) except Duplicate: print('%(rse)s already added' % locals()) except InvalidObject as err: print(err) continue except: errno, errstr = sys.exc_info()[:2] trcbck = traceback.format_exc() print('Interrupted processing with %s %s %s.' % (errno, errstr, trcbck)) for p_id in repo_data[rse]['protocols']['supported']: try: p = repo_data[rse]['protocols']['supported'][p_id] p['scheme'] = p_id c.add_protocol(rse, p) except ValueError as e: print(rse, e) except Duplicate as e: print(rse, e) except Exception: errno, errstr = sys.exc_info()[:2] trcbck = traceback.format_exc() print('Interrupted processing for %s with %s %s %s.' % (rse, errno, errstr, trcbck))
def run(total_workers=1, once=False, inputfile=None): """ Starts up the automatix threads. """ try: sites = [s.strip() for s in get('automatix', 'sites').split(',')] except Exception: raise Exception('Could not load sites from configuration') if not inputfile: inputfile = '/opt/rucio/etc/automatix.json' try: sleep_time = get('automatix', 'sleep_time') except Exception: sleep_time = 30 try: account = get('automatix', 'account') except Exception: account = 'root' try: dataset_lifetime = get('automatix', 'dataset_lifetime') except Exception: dataset_lifetime = None try: set_metadata = get('automatix', 'set_metadata') except Exception: set_metadata = False try: scope = get('automatix', 'scope') client = Client() filters = {'scope': InternalScope('*', vo=client.vo)} if InternalScope(scope, vo=client.vo) not in list_scopes(filter=filters): logging.error('Scope %s does not exist. Exiting', scope) GRACEFUL_STOP.set() except Exception: scope = False threads = list() for worker_number in range(0, total_workers): kwargs = {'worker_number': worker_number, 'total_workers': total_workers, 'once': once, 'sites': sites, 'sleep_time': sleep_time, 'account': account, 'inputfile': inputfile, 'set_metadata': set_metadata, 'scope': scope, 'dataset_lifetime': dataset_lifetime} threads.append(threading.Thread(target=automatix, kwargs=kwargs)) [thread.start() for thread in threads] while threads[0].is_alive(): logging.debug('Still %i active threads', len(threads)) [thread.join(timeout=3.14) for thread in threads]
def __init__(self): try: # set up Rucio environment os.environ['RUCIO_ACCOUNT'] = DDM_ACCOUNT_NAME os.environ['RUCIO_AUTH_TYPE'] = 'x509_proxy' os.environ['X509_USER_PROXY'] = self._get_proxy() self.ddm_client = Client() except CannotAuthenticate as ex: logger.critical('DDM: authentication failed: {0}'.format(str(ex))) except Exception as ex: logger.critical('DDM: initialization failed: {0}'.format(str(ex)))
def __init__(self, account, auth_type=None, exclude=DEFAULT_EXCLUDE_LINKS, distance=None, rselist=None): if distance is None: distance = DEFAULT_DISTANCE_RULES self.rcli = Client(account=account, auth_type=auth_type) self._get_rselist(rselist) self._get_matrix(distance, exclude)
def __init__(self): aCTLDMXProcess.__init__(self) self.rucio = Client() self.rucio_prometheus_port = int( self.arcconf.get(['monitor', 'rucioprometheusport']) or 0) if self.rucio_prometheus_port: start_http_server(self.rucio_prometheus_port) self.collector = aCTRucioCollector() REGISTRY.register(self.collector) else: self.log.info('Prometheus monitoring not enabled')
def __init__(self, config, datasource_name, dataset_expr, dataset_nick=None, dataset_proc=None): dataset_config = config.change_view( default_on_change=TriggerResync(['datasets', 'parameters'])) self._lumi_filter = dataset_config.get_lookup( ['lumi filter', '%s lumi filter' % datasource_name], default={}, parser=parse_lumi_filter, strfun=str_lumi) if not self._lumi_filter.empty(): config.set('%s processor' % datasource_name, 'LumiDataProcessor', '+=') DataProvider.__init__(self, config, datasource_name, dataset_expr, dataset_nick, dataset_proc) # LumiDataProcessor instantiated in DataProcessor.__ini__ will set lumi metadata as well self._lumi_query = dataset_config.get_bool( ['lumi metadata', '%s lumi metadata' % datasource_name], default=not self._lumi_filter.empty()) config.set('phedex sites matcher mode', 'ShellStyleMatcher', '?=') # PhEDex blacklist: 'T1_*_Disk nodes allow user jobs - other T1's dont! self._phedex_filter = dataset_config.get_filter( 'phedex sites', '-* T1_*_Disk T2_* T3_*', default_matcher='BlackWhiteMatcher', default_filter='StrictListFilter') self._only_complete = dataset_config.get_bool('only complete sites', True) self._only_valid = dataset_config.get_bool('only valid', True) self._allow_phedex = dataset_config.get_bool('allow phedex', True) self._location_format = dataset_config.get_enum( 'location format', CMSLocationFormat, CMSLocationFormat.hostname) self._sitedb = CRIC() token = AccessToken.create_instance('VomsProxy', create_config(), 'token') self._rucio = Client( account=self._sitedb.dn_to_username(token.get_fq_user_name())) dataset_expr_parts = split_opt(dataset_expr, '@#') (self._dataset_path, self._dataset_instance, self._dataset_block_selector) = dataset_expr_parts instance_default = dataset_config.get('dbs instance', '') self._dataset_instance = self._dataset_instance or instance_default if not self._dataset_instance: self._dataset_instance = 'prod/global' elif '/' not in self._dataset_instance: self._dataset_instance = 'prod/%s' % self._dataset_instance self._dataset_block_selector = self._dataset_block_selector or 'all'
def submit_transfer_to_rucio(name, source_url, bytes, adler32): _LOGGER.info("Here") # transfer pre-prod -> prod -> snic rucio_client = Client() # TODO: scope should be extracted from the path: Top directory scope = 'functional_tests' try: replica = { 'scope': scope, 'name': name, 'pfn': source_url, 'bytes': int(bytes), 'adler32': adler32 } _LOGGER.debug('Register replica {}'.format(str(replica))) rse = 'NDGF-PREPROD' account = 'garvin' rucio_client.add_replicas(rse=rse, files=[replica]) kwargss = [{ 'rse_expression': 'NDGF-PREPROD', 'lifetime': 86400 }, { 'rse_expression': 'NDGF', 'source_replica_expression': 'NDGF-PREPROD', 'lifetime': 86400 }, { 'rse_expression': 'SNIC', 'source_replica_expression': 'NDGF', 'lifetime': 86400 }] for kwargs in kwargss: rule = rucio_client.add_replication_rule(dids=[{ 'scope': scope, 'name': name }], account=account, copies=1, grouping='NONE', weight=None, locked=False, **kwargs) _LOGGER.info('Added rule for file to {}: {}'.format(kwargs, rule)) except: _LOGGER.error(traceback.format_exc())
def upload_dataset(rse: str, temp_rse: str, dataset_path: str, dataset_name: str, copies: int, lifetime: int): """ Upload a files specified in the folder and create a dataset containing those files, further create a rule for the dataset on specified rse """ rucio_client = Client() upload_params = { "tempRSE": temp_rse, } rule_params = {"copies": copies, "rse": rse, "lifetime": lifetime} upload_dataset_and_create_rule(rucio_client, dataset_path, dataset_name, rule_params, upload_params)
def client(self): """Check if the session to the server is still active and return an instance of RucioClient""" try: self._client.ping() return self._client except Exception: if not self.useDiracCS: self._client = Client(account=self.account) else: self._client = Client( account=self.username, rucio_host=self.rucioHost, auth_host=self.authHost, ca_cert=self.caCertPath, auth_type="x509_proxy", creds={"client_proxy": self.proxyPath}, timeout=600, user_agent="rucio-clients", vo=self.VO, ) self.scopes = self._client.list_scopes() return self._client
def upload_file(rse: str, temp_rse: str, lfn: str, file_path: str, copies: int, lifetime: int): """ Upload a single file and create a rule for rse """ typer.echo(f"File {file_path} will be uploaded to {rse}") upload_params = { "tempRSE": temp_rse, } rule_params = {"copies": copies, "rse": rse, "lifetime": lifetime} rucio_client = Client() upload_file_and_create_rule(rucio_client, file_path, lfn, upload_params, rule_params)
def setUp(self): self.client = Client() self.account = 'root' self.scope = 'mc' self.rse = 'Mock' self.prefix = 'srm://mock.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests' scopes = [_ for _ in self.client.list_scopes()] if 'other' not in scopes: self.client.add_scope(self.account, 'other') try: self.client.add_container('other', '/belle') except DataIdentifierAlreadyExists: pass if self.scope not in scopes: self.client.add_scope(self.account, self.scope) if 'ANY' not in self.client.list_rse_attributes(self.rse): self.client.add_rse_attribute(self.rse, key='ANY', value=True)
def resolve_replicas(self, files): """ populates fdat.inputddms and fdat.replicas of each entry from `files` list fdat.replicas = [(ddmendpoint, replica, ddm_se)] ddm_se -- integration logic -- is used to manualy form TURL when ignore_rucio_replicas=True (quick stab until all protocols are properly populated in Rucio from AGIS) """ # build list of local ddmendpoints grouped by site # load ALL ddmconf self.ddmconf.update(self.si.resolveDDMConf([])) ddms = {} for ddm, dat in self.ddmconf.iteritems(): ddms.setdefault(dat['site'], []).append(dat) for fdat in files: # build and order list of local ddms ddmdat = self.ddmconf.get(fdat.ddmendpoint) if not ddmdat: raise Exception( "Failed to resolve ddmendpoint by name=%s send by Panda job, please check configuration. fdat=%s" % (fdat.ddmendpoint, fdat)) if not ddmdat['site']: raise Exception( "Failed to resolve site name of ddmendpoint=%s. please check ddm declaration: ddmconf=%s ... fdat=%s" % (fdat.ddmendpoint, ddmconf, fdat)) localddms = ddms.get(ddmdat['site']) # sort/filter ddms (as possible input source) fdat.inputddms = self._prepare_input_ddm(ddmdat, localddms) # load replicas from Rucio from rucio.client import Client c = Client() dids = [dict(scope=e.scope, name=e.lfn) for e in files] schemes = ['srm', 'root', 'https', 'gsiftp'] # Get the replica list try: replicas = c.list_replicas(dids, schemes=schemes) except Exception, e: raise PilotException("Failed to get replicas from Rucio: %s" % e, code=PilotErrors.ERR_FAILEDLFCGETREPS)
def get_replicas(scope='user.mlassnig', filename='user.mlassnig.pilot.test.single.hits'): c = Client() replicas = c.list_replicas(dids=[{ 'scope': scope, 'name': filename }], schemes=['root'] # , client_location={'site': 'MWT2'} ) res = [] for replica in replicas: r = replica['pfns'] for p in r: res.append(Replica(r[p]['rse'], p)) return res
def upload(files, scope, metadata, rse, account, source_dir, worker_number, total_workers, dataset_lifetime, did=None, set_metadata=False): logging.debug('In upload') dsn = None if did: dsn = {'scope': did.split(':')[0], 'name': did.split(':')[1]} client = Client() list_files = [] lfns = [] prepend_str = 'Thread [%i/%i] : ' % (worker_number, total_workers) logging.debug(prepend_str + 'Looping over the files') for filename in files: fullpath = '%s/%s' % (source_dir, filename) size = stat(fullpath).st_size checksum = adler32(fullpath) logging.info(prepend_str + 'File %s : Size %s , adler32 %s' % (fullpath, str(size), checksum)) list_files.append({'scope': scope, 'name': filename, 'bytes': size, 'adler32': checksum, 'meta': {'guid': generate_uuid()}}) lfns.append({'name': filename, 'scope': scope, 'filesize': size, 'adler32': checksum, 'filename': filename}) # Physical upload logging.info(prepend_str + 'Uploading physically the files %s on %s' % (str(lfns), rse)) rse_info = rsemgr.get_rse_info(rse) try: success_upload = True for cnt in xrange(0, 3): global_status, ret = rsemgr.upload(rse_info, lfns=lfns, source_dir=source_dir) logging.info(prepend_str + 'Returned global status : %s, Returned : %s' % (str(global_status), str(ret))) if not global_status: for item in ret: if (not isinstance(ret[item], FileReplicaAlreadyExists)) and ret[item] is not True: sleep(exp(cnt)) success_upload = False logging.error(prepend_str + 'Problem to upload file %s with error %s' % (item, str(ret[item]))) break else: break if not success_upload: logging.error(prepend_str + 'Upload operation to %s failed, removing leftovers' % (rse)) rsemgr.delete(rse_info, lfns=lfns) return False except Exception, error: logging.error(prepend_str + '%s' % (str(error))) return False
def make_outDS(spec): c = Client() scope = 'user.{}'.format(spec['user']) outputs = spec['outputs'] taskid = spec['taskid'] outDS_stub = spec['outDS'] for out in outputs: files = [{ 'name': x, 'scope': scope } for x in c.list_dids(scope, { 'name': 'user.{user}.{taskid}.*.{output}'.format( user=spec['user'], taskid=str(taskid).zfill(8), output=out) }, type='file')] ds_name = '{}_{}'.format(outDS_stub.split(':', 1)[-1], out) log.warning('creating outDS {}:{}'.format(scope, ds_name)) c.add_dataset(scope, ds_name, files=files)
def do_stuff(q, storage, rse, scope, proxy): rucio_client = Client() s = requests.Session() # s.auth = (user,pw) s.cert = proxy s.verify = False urllib3.disable_warnings() while True: try: name, new_file = q.get() response = s.get(new_file, headers={'Want-Digest': 'adler32'}) adler32 = response.headers['Digest'].replace('adler32=', '') bytes = response.headers['Content-Length'] replica = { 'scope': scope, 'name': name, 'pfn': new_file, 'bytes': int(bytes), 'adler32': adler32 } rucio_client.add_replicas(rse='DESY-DISCORDIA', files=[replica]) rucio_client.add_replication_rule(dids=[{ 'scope': scope, 'name': name }], account='root', copies=1, rse_expression=rse, grouping='NONE', weight=None, lifetime=None, locked=False) _LOGGER.info('Added replica and rule for file' + scope + ':' + name) except: _LOGGER.error(traceback.format_exc()) finally: q.task_done()
def create_site_map(rse_map): """ Creation of a net of sources and destination with trafic between them. """ client = Client() trafic_map = {} for link in rse_map: src_site = client.list_rse_attributes(link['src_rse'])['site'] dst_site = client.list_rse_attributes(link['dst_rse'])['site'] trafic = int(link['bytes']) # map creation site to site if src_site in trafic_map.keys(): if dst_site in trafic_map[src_site].keys(): trafic_map[src_site][dst_site] += trafic else: trafic_map[src_site][dst_site] = trafic else: trafic_map[src_site] = {src_site: trafic} return trafic_map
def get_pfns(rse: str, lfns: list): from rucio.client import Client pfns = [] pfn_map = {} rucio_client = Client() rucio_scope = f'user.{rucio_client.account}' #TODO do we need a check for this? try: rgx = rucio_client.get_protocols(rse.split("_Temp")[0], protocol_domain='ALL', operation="write")[0] if not rgx['extended_attributes'] or 'tfc' not in rgx[ 'extended_attributes']: pfn_0 = rucio_client.lfns2pfns(rse=rse.split("_Temp")[0], lfns=[rucio_scope + ":" + lfns[0]], operation="write") pfns.append(pfn_0[rucio_scope + ":" + lfns[0]]) prefix = pfn_0[rucio_scope + ":" + lfns[0]].split(lfns[0])[0] for lfn in lfns: pfn_map.update({lfn: prefix + lfn}) else: for lfn in lfns: if 'tfc' in rgx['extended_attributes']: tfc = rgx['extended_attributes']['tfc'] tfc_proto = rgx['extended_attributes']['tfc_proto'] pfn_map.update({lfn: tfc_lfn2pfn(lfn, tfc, tfc_proto)}) except TypeError: raise TypeError( 'Cannot determine PFN for LFN %s:%s at %s with proto %s' % rucio_scope, lfn, rse, rgx) return pfn_map
def __init__(self): # Get agent name from /path/to/aCTAgent.py self.name = os.path.basename(sys.argv[0])[:-3] # logger self.logger = aCTLogger.aCTLogger(self.name) self.log = self.logger() self.criticallogger = aCTLogger.aCTLogger('aCTCritical', arclog=False) self.criticallog = self.criticallogger() # config self.conf = aCTConfig.aCTConfigAPP() self.arcconf = aCTConfig.aCTConfigARC() self.tmpdir = str(self.arcconf.get(['tmp', 'dir'])) # database self.dbarc = aCTDBArc.aCTDBArc(self.log) self.dbldmx = aCTDBLDMX.aCTDBLDMX(self.log) # Rucio client self.rucio = Client() # start time for periodic restart self.starttime = time.time() self.log.info("Started %s", self.name)
def upload_file_yaml(yamlfile: str): """ Same as upload file Arguments specified in yaml files """ with open(yamlfile) as f: yaml_string = f.read() try: params = load(yaml_string=yaml_string, schema=fileSchema) except YAMLValidationError as e: print(e) raise typer.Exit() params = params.data file_path = params["specs"]["filePath"] lfn = params["specs"]["lfn"] upload_params = params["specs"]["options"]["upload"] rule_params = params["specs"]["options"]["rule"] rucio_client = Client() upload_file_and_create_rule(rucio_client, file_path, lfn, upload_params, rule_params)
def place_replica(once=False, thread=0, did_queue=None, waiting_time=100, dry_run=False, sampling=False, algorithms='t2_free_space_only_pop_with_network', datatypes='NTUP,DAOD', dest_rse_expr='type=DATADISK', max_bytes_hour=100000000000000, max_files_hour=100000, max_bytes_hour_rse=50000000000000, max_files_hour_rse=10000, min_popularity=8, min_recent_requests=5, max_replicas=5, sleep_time=10): """ Thread to run the placement algorithm to decide if and where to put new replicas. """ try: c3po_options = config_get_options('c3po') client = None if 'algorithms' in c3po_options: algorithms = config_get('c3po', 'algorithms') algorithms = algorithms.split(',') if not dry_run: if len(algorithms) != 1: logging.error('Multiple algorithms are only allowed in dry_run mode') return client = Client(auth_type='x509_proxy', account='c3po', creds={'client_proxy': '/opt/rucio/etc/ddmadmin.long.proxy'}) vo = client.vo instances = {} for algorithm in algorithms: module_path = 'rucio.daemons.c3po.algorithms.' + algorithm module = __import__(module_path, globals(), locals(), ['PlacementAlgorithm']) instance = module.PlacementAlgorithm(datatypes, dest_rse_expr, max_bytes_hour, max_files_hour, max_bytes_hour_rse, max_files_hour_rse, min_popularity, min_recent_requests, max_replicas) instances[algorithm] = instance params = { 'dry_run': dry_run, 'sampling': sampling, 'datatypes': datatypes, 'dest_rse_expr': dest_rse_expr, 'max_bytes_hour': max_bytes_hour, 'max_files_hour': max_files_hour, 'max_bytes_hour_rse': max_bytes_hour_rse, 'max_files_hour_rse': max_files_hour_rse, 'min_recent_requests': min_recent_requests, 'min_popularity': min_popularity } instance_id = str(uuid4()).split('-')[0] elastic_url = config_get('c3po', 'elastic_url') elastic_index = config_get('c3po', 'elastic_index') ca_cert = False if 'ca_cert' in c3po_options: ca_cert = config_get('c3po', 'ca_cert') auth = False if ('elastic_user' in c3po_options) and ('elastic_pass' in c3po_options): auth = HTTPBasicAuth(config_get('c3po', 'elastic_user'), config_get('c3po', 'elastic_pass')) w = waiting_time while not GRACEFUL_STOP.is_set(): if w < waiting_time: w += sleep_time sleep(sleep_time) continue len_dids = did_queue.qsize() if len_dids > 0: logging.debug('(%s) %d did(s) in queue' % (instance_id, len_dids)) else: logging.debug('(%s) no dids in queue' % (instance_id)) for _ in range(0, len_dids): did = did_queue.get() if isinstance(did[0], string_types): did[0] = InternalScope(did[0], vo=vo) for algorithm, instance in instances.items(): logging.info('(%s:%s) Retrieved %s:%s from queue. Run placement algorithm' % (algorithm, instance_id, did[0], did[1])) decision = instance.place(did) decision['@timestamp'] = datetime.utcnow().isoformat() decision['algorithm'] = algorithm decision['instance_id'] = instance_id decision['params'] = params create_rule = True if sampling and 'error_reason' not in decision: create_rule = bool(ord(md5(decision['did']).hexdigest()[-1]) & 1) decision['create_rule'] = create_rule # write the output to ES for further analysis index_url = elastic_url + '/' + elastic_index + '-' + datetime.utcnow().strftime('%Y-%m') + '/record/' try: if ca_cert: r = post(index_url, data=dumps(decision), verify=ca_cert, auth=auth) else: r = post(index_url, data=dumps(decision)) if r.status_code != 201: logging.error(r) logging.error('(%s:%s) could not write to ElasticSearch' % (algorithm, instance_id)) except RequestException as e: logging.error('(%s:%s) could not write to ElasticSearch' % (algorithm, instance_id)) logging.error(e) continue logging.debug(decision) if 'error_reason' in decision: logging.error('(%s:%s) The placement algorithm ran into an error: %s' % (algorithm, instance_id, decision['error_reason'])) continue logging.info('(%s:%s) Decided to place a new replica for %s on %s' % (algorithm, instance_id, decision['did'], decision['destination_rse'])) if (not dry_run) and create_rule: # DO IT! try: add_rule(client, {'scope': did[0].external, 'name': did[1]}, decision.get('source_rse'), decision.get('destination_rse')) except exception.RucioException as e: logging.debug(e) w = 0 except Exception as e: logging.critical(e)