def test_list_dataset_replicas_bulk(self): """ REPLICA (CLIENT): List dataset replicas bulk.""" replica_client = ReplicaClient() rule_client = RuleClient() did_client = DIDClient() scope = 'mock' did1 = {'scope': scope, 'name': 'dataset_' + str(generate_uuid())} did_client.add_dataset(**did1) did2 = {'scope': scope, 'name': 'dataset_' + str(generate_uuid())} did_client.add_dataset(**did2) dids = [did1, did2] rule_client.add_replication_rule(dids=dids, account='root', copies=1, rse_expression='MOCK', grouping='DATASET') with pytest.raises(InvalidObject): replica_client.list_dataset_replicas_bulk( dids=[{ 'type': "I'm Different" }]) replicas = list(replica_client.list_dataset_replicas_bulk(dids=dids)) assert len(replicas) == 2 for did in dids: def replica_contains_did(rep): return all(map(lambda k: k in rep and did[k] == rep[k], did)) assert any( map(replica_contains_did, replicas)), "%s must be in returned replicas" % (did, )
def test_list_datasets_per_rse(self): """ REPLICA (CLIENT): List datasets in RSE.""" rule_client = RuleClient() did_client = DIDClient() scope = 'mock' dataset = 'dataset_' + str(generate_uuid()) did_client.add_dataset(scope=scope, name=dataset) rule_client.add_replication_rule(dids=[{ 'scope': scope, 'name': dataset }], account='root', copies=1, rse_expression='MOCK', grouping='DATASET') replicas = [ r for r in list_datasets_per_rse( rse_id=get_rse_id(rse='MOCK', **self.vo), filters={ 'scope': InternalScope(scope, **self.vo), 'name': 'data*' }) ] assert replicas != []
def test_list_dataset_replicas(self): """ REPLICA (CLIENT): List dataset replicas.""" replica_client = ReplicaClient() rule_client = RuleClient() did_client = DIDClient() scope = 'mock' dataset = 'dataset_' + str(generate_uuid()) did_client.add_dataset(scope=scope, name=dataset) rule_client.add_replication_rule(dids=[{'scope': scope, 'name': dataset}], account='root', copies=1, rse_expression='MOCK', grouping='DATASET') replicas = [r for r in replica_client.list_dataset_replicas(scope=scope, name=dataset)] assert len(replicas) == 1
def test_list_dataset_replicas_archive(self): """ REPLICA (CLIENT): List dataset replicas with archives. """ replica_client = ReplicaClient() did_client = DIDClient() rule_client = RuleClient() scope = 'mock' rse = 'APERTURE_%s' % rse_name_generator() rse_id = add_rse(rse, **self.vo) add_protocol(rse_id=rse_id, parameter={ 'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) rse2 = 'BLACKMESA_%s' % rse_name_generator() rse2_id = add_rse(rse2, **self.vo) add_protocol(rse_id=rse2_id, parameter={ 'scheme': 'root', 'hostname': 'root.blackmesa.com', 'port': 1409, 'prefix': '//underground/facility', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) # register archive archive = { 'scope': scope, 'name': 'another.%s.zip' % generate_uuid(), 'type': 'FILE', 'bytes': 2596, 'adler32': 'deedbeaf' } replica_client.add_replicas(rse=rse, files=[archive]) replica_client.add_replicas(rse=rse2, files=[archive]) archived_files = [{ 'scope': scope, 'name': 'zippedfile-%i-%s' % (i, str(generate_uuid())), 'type': 'FILE', 'bytes': 4322, 'adler32': 'deaddead' } for i in range(2)] replica_client.add_replicas(rse=rse2, files=archived_files) did_client.add_files_to_archive(scope=scope, name=archive['name'], files=archived_files) dataset_name = 'find_me.' + str(generate_uuid()) did_client.add_dataset(scope=scope, name=dataset_name) did_client.attach_dids(scope=scope, name=dataset_name, dids=archived_files) rule_client.add_replication_rule(dids=[{ 'scope': scope, 'name': dataset_name }], account='root', copies=1, rse_expression=rse, grouping='DATASET') res = [ r for r in replica_client.list_dataset_replicas(scope=scope, name=dataset_name) ] assert len(res) == 1 assert res[0]['state'] == 'UNAVAILABLE' res = [ r for r in replica_client.list_dataset_replicas( scope=scope, name=dataset_name, deep=True) ] assert len(res) == 3 assert res[0]['state'] == 'AVAILABLE' assert res[1]['state'] == 'AVAILABLE' assert res[2]['state'] == 'AVAILABLE' del_rse(rse_id)
class Rucio: def __init__(self, myscope, orgRse, destRse, account='bruzzese', working_folder=None): self.myscope = myscope self.orgRse = orgRse self.destRse = destRse self.working_folder = working_folder self.gfal = Gfal2Context() self.didc = DIDClient() self.repc = ReplicaClient() self.rulesClient = RuleClient() # Configuration self.account = account # account=account self.client = Client(account=self.account) # Get list of all RSEs def rses(self): rses_lists = list() for single_rse in list(self.client.list_rses()): rses_lists.append(single_rse['rse']) return (rses_lists) def usage(self, s_rse): return (list( self.client.get_local_account_usage(account=self.account, rse=s_rse))[0]) def rules(self): return (list(self.client.list_account_rules(account=self.account))) def myfunc(self): print( "Hello your setting are account=%s, scope=%s, origin RSE =%s and destination RSE =%s" % (self.account, self.myscope, self.orgRse, self.destRse)) def file_exists(self, pfn): try: self.gfal.stat(pfn).st_size return (True) except: return (False) def get_rse_url(self): """ Return the base path of the rucio url """ rse_settings = rsemgr.get_rse_info(self.orgRse) protocol = rse_settings['protocols'][0] schema = protocol['scheme'] prefix = protocol['prefix'] port = protocol['port'] rucioserver = protocol['hostname'] rse_url = list() if None not in (schema, str(rucioserver + ':' + str(port)), prefix): rse_url.extend( [schema, rucioserver + ':' + str(port), prefix, '', '']) if self.working_folder != None: # Check if our test folder exists path = os.path.join(urlunsplit(rse_url), self.working_folder) self.gfal.mkdir_rec(path, 775) return (path) else: return (urlunsplit(rse_url)) else: return ('Wrong url parameters') def check_replica(self, lfn, dest_rse=None): """ Check if a replica of the given file at the site already exists. """ print('here', self.myscope, lfn, dest_rse) if lfn: try: replicas = list( self.client.list_replicas([{ 'scope': self.myscope, 'name': lfn }], rse_expression=dest_rse)) if replicas: for replica in replicas: if isinstance(replica, dict): if dest_rse in replica['rses']: path = replica['rses'][dest_rse][0] return (path) return (False) except: pass ############################ ## Create Metadata for DIDs ############################ def getFileMetaData(self, p_file, origenrse=None): """ Get the size and checksum for every file in the run from defined path """ ''' generate the registration of the file in a RSE : :param rse: the RSE name. :param scope: The scope of the file. :param name: The name of the file. :param bytes: The size in bytes. :param adler32: adler32 checksum. :param pfn: PFN of the file for non deterministic RSE :param dsn: is the dataset name. ''' name = os.path.basename(p_file) name = name.replace('/', '') name = name.replace('%', '_') replica = { 'scope': self.myscope, 'name': name.replace('+', '_'), 'adler32': self.gfal.checksum(p_file, 'adler32'), 'bytes': self.gfal.stat(p_file).st_size, 'pfn': p_file, "meta": { "guid": str(generate_uuid()) } } Data = dict() Data['replica'] = replica Data['scope'] = self.myscope return (Data) ############################ ## Create Groups of DIDs ############################ def createDataset(self, new_dataset): logger.debug( "| - - Checking if a provided dataset exists: %s for a scope %s" % (new_dataset, self.myscope)) try: self.client.add_dataset(scope=self.myscope, name=new_dataset) return (True) except DataIdentifierAlreadyExists: return (False) except Duplicate as error: return generate_http_error_flask(409, 'Duplicate', error.args[0]) except AccountNotFound as error: return generate_http_error_flask(404, 'AccountNotFound', error.args[0]) except RucioException as error: exc_type, exc_obj, tb = sys.exc_info() logger.debug(exc_obj) def createcontainer(self, name_container): ''' registration of the dataset into a container : :param name_container: the container's name :param info_dataset : contains, the scope: The scope of the file. the name: The dataset name. ''' logger.debug("| - - - registering container %s" % name_container) try: self.client.add_container(scope=self.myscope, name=name_container) except DataIdentifierAlreadyExists: logger.debug("| - - - Container %s already exists" % name_container) except Duplicate as error: return generate_http_error_flask(409, 'Duplicate', error.args[0]) except AccountNotFound as error: return generate_http_error_flask(404, 'AccountNotFound', error.args[0]) except RucioException as error: exc_type, exc_obj, tb = sys.exc_info() logger.debug(exc_obj) ############################ ## General funciotn for registering a did into a GROUP of DID (CONTAINER/DATASET) ############################ def registerIntoGroup(self, n_file, new_dataset): """ Attaching a DID to a GROUP """ type_1 = self.client.get_did(scope=self.myscope, name=new_dataset) type_2 = self.client.get_did(scope=self.myscope, name=n_file) print('attaching ', n_file, new_dataset) try: self.client.attach_dids(scope=self.myscope, name=new_dataset, dids=[{ 'scope': self.myscope, 'name': n_file }]) except RucioException: logger.debug("| - - - %s already attached to %s" % (type_2['type'], type_1['type'])) ############################ ## MAGIC functions ############################ def create_groups(self, organization): #print(organization) # 2.1) Create the dataset and containers for the file self.createDataset(organization['dataset_1'].replace('%', '_')) # 2.1.1) Attach the dataset and containers for the file self.registerIntoGroup(organization['replica'].replace('%', '_'), organization['dataset_1'].replace('%', '_')) # 2.2) Create the dataset and containers for the file self.createcontainer(organization['container_1'].replace('%', '_')) # 2.2.1) Attach the dataset and containers for the file self.registerIntoGroup(organization['dataset_1'].replace('%', '_'), organization['container_1'].replace('%', '_')) # 2.3) Create the dataset and containers for the file self.createcontainer(organization['container_2'].replace('%', '_')) # 2.3.1) Attach the dataset and containers for the file self.registerIntoGroup(organization['container_1'].replace('%', '_'), organization['container_2'].replace('%', '_')) # 2.4) Create the dataset and containers for the file self.createcontainer(organization['container_3'].replace('%', '_')) # 2.4.1) Attach the dataset and containers for the file self.registerIntoGroup(organization['container_2'].replace('%', '_'), organization['container_3'].replace('%', '_')) ############################ ## Create Rule for DIDs ############################ def addReplicaRule(self, destRSE, group): """ Create a replication rule for one dataset at a destination RSE """ type_1 = self.client.get_did(scope=self.myscope, name=group) logger.debug("| - - - Creating replica rule for %s %s at rse: %s" % (type_1['type'], group, destRSE)) if destRSE: try: rule = self.rulesClient.add_replication_rule( [{ "scope": self.myscope, "name": group }], copies=1, rse_expression=destRSE, grouping='ALL', account=self.account, purge_replicas=True, asynchronous=True) logger.debug("| - - - - Rule succesfully replicated at %s" % destRSE) logger.debug("| - - - - - The %s has the following id %s" % (rule, destRSE)) return (rule[0]) except DuplicateRule: exc_type, exc_obj, tb = sys.exc_info() rules = list( self.client.list_account_rules(account=self.account)) if rules: for rule in rules: if rule['rse_expression'] == destRSE and rule[ 'scope'] == self.myscope and rule[ 'name'] == group: logger.debug( '| - - - - Rule already exists %s which contains the following DID %s:%s %s' % (rule['id'], self.myscope, group, str(exc_obj))) except ReplicationRuleCreationTemporaryFailed: exc_type, exc_obj, tb = sys.exc_info() rules = list( self.client.list_account_rules(account=self.account)) if rules: for rule in rules: if rule['rse_expression'] == destRSE and rule[ 'scope'] == self.myscope and rule[ 'name'] == group: print( '| - - - - Rule already exists %s which contains the following DID %s:%s %s' % (rule['id'], self.myscope, group, str(exc_obj))) def addReplicaRule_noasync(self, destRSE, group): """ Create a replication rule for one dataset at a destination RSE """ type_1 = self.client.get_did(scope=self.myscope, name=group) logger.debug("| - - - Creating replica rule for %s %s at rse: %s" % (type_1['type'], group, destRSE)) if destRSE: try: rule = self.rulesClient.add_replication_rule( [{ "scope": self.myscope, "name": group }], copies=1, rse_expression=destRSE, grouping='ALL', account=self.account, purge_replicas=True) logger.debug("| - - - - Rule succesfully replicated at %s" % destRSE) logger.debug("| - - - - - The %s has the following id %s" % (rule, destRSE)) return (rule[0]) except DuplicateRule: exc_type, exc_obj, tb = sys.exc_info() rules = list( self.client.list_account_rules(account=self.account)) if rules: for rule in rules: if rule['rse_expression'] == destRSE and rule[ 'scope'] == self.myscope and rule[ 'name'] == group: logger.debug( '| - - - - Rule already exists %s which contains the following DID %s:%s %s' % (rule['id'], self.myscope, group, str(exc_obj))) except ReplicationRuleCreationTemporaryFailed: exc_type, exc_obj, tb = sys.exc_info() rules = list( self.client.list_account_rules(account=self.account)) if rules: for rule in rules: if rule['rse_expression'] == destRSE and rule[ 'scope'] == self.myscope and rule[ 'name'] == group: print( '| - - - - Rule already exists %s which contains the following DID %s:%s %s' % (rule['id'], self.myscope, group, str(exc_obj))) ############################ ## Create Rules for not registered DIDs ############################ def outdated_register_replica(self, filemds, dest_RSE, org_RSE): """ Register file replica. """ carrier_dataset = 'outdated_replication_dataset' + '-' + str( uuid.uuid4()) creation = self.createDataset(carrier_dataset) # Make sure your dataset is ephemeral self.client.set_metadata(scope=self.myscope, name=carrier_dataset, key='lifetime', value=86400) # 86400 in seconds = 1 day # Create a completly new create the RULE: for filemd in filemds: outdated = filemd['replica']['name'] self.registerIntoGroup(outdated, carrier_dataset) # Add dummy dataset for replicating at Destination RSE # Sometimes Rucio ends up with an error message like this : rucio.common.exception.RuleNotFound: No replication rule found. # In order to avoid that nonsense error we do the following loop : '''for i in range(0,100): while True: try: # do stuff rule = self.addReplicaRule(dest_RSE, group=carrier_dataset) if rule != None : rule_child = rule except : continue break''' for i in range(0, 10): print(i) try: # do stuff rule = self.addReplicaRule(dest_RSE, group=carrier_dataset) if rule != None: rule_child = rule print(rule_child) break except: print('fail') continue for i in range(0, 10): print(i) try: # do stuff rule = self.addReplicaRule_noasync(org_RSE, group=carrier_dataset) if rule != None: rule_parent = rule print(rule_parent) break except: print('fail') continue # rule_child = self.addReplicaRule(dest_RSE, group=carrier_dataset) # Add dummy dataset for replicating Origin RSE # rule_parent = self.addReplicaRule(org_RSE, group=carrier_dataset) print(rule_child, rule_parent) # Create a relation rule between origin and destiny RSE, so that the source data can be deleted rule = self.client.update_replication_rule(rule_id=rule_parent, options={ 'lifetime': 10, 'child_rule_id': rule_child, 'purge_replicas': True }) logger.debug( '| - - - - Creating relationship between parent %s and child %s : %s' % (rule_parent, rule_child, rule)) # Create a relation rule between the destinity rule RSE with itself, to delete the dummy rule, whiles keeping the destiny files rule = self.client.update_replication_rule(rule_id=rule_child, options={ 'lifetime': 10, 'child_rule_id': rule_child }) logger.debug( '| - - - - Creating relationship between parent %s and child %s : %s' % (rule_parent, rule_child, rule)) ############################ ## Create Dictionary for Grafana ############################ def stats_rules(self, rules): ''' Gather general information about total number of rules, and stats. ''' RUCIO = dict() if rules: for rule in rules: if 'outdated_replication_dataset' not in rule['name']: if 'Rules' not in RUCIO: RUCIO['Rules'] = { 'total_stuck': 0, 'total_replicating': 0, 'total_ok': 0, 'total_rules': 0 } RUCIO['Rules']['total_rules'] += 1 if rule['state'] == 'REPLICATING': RUCIO['Rules']['total_replicating'] += 1 elif rule['state'] == 'STUCK': RUCIO['Rules']['total_stuck'] += 1 elif rule['state'] == 'OK': RUCIO['Rules']['total_ok'] += 1 else: RUCIO['Rules']['total_rules'] += 1 if rule['state'] == 'REPLICATING': RUCIO['Rules']['total_replicating'] += 1 elif rule['state'] == 'STUCK': RUCIO['Rules']['total_stuck'] += 1 elif rule['state'] == 'OK': RUCIO['Rules']['total_ok'] += 1 if 'AllRules' not in RUCIO: RUCIO['AllRules'] = { 'total_stuck': 0, 'total_replicating': 0, 'total_ok': 0, 'total_rules': 0 } RUCIO['AllRules']['total_rules'] += 1 if rule['state'] == 'REPLICATING': RUCIO['AllRules']['total_replicating'] += 1 elif rule['state'] == 'STUCK': RUCIO['AllRules']['total_stuck'] += 1 elif rule['state'] == 'OK': RUCIO['AllRules']['total_ok'] += 1 else: RUCIO['AllRules']['total_rules'] += 1 if rule['state'] == 'REPLICATING': RUCIO['AllRules']['total_replicating'] += 1 elif rule['state'] == 'STUCK': RUCIO['AllRules']['total_stuck'] += 1 elif rule['state'] == 'OK': RUCIO['AllRules']['total_ok'] += 1 ################## if 'Grouping' not in RUCIO: RUCIO['Grouping'] = { 'file': 0, 'dataset': 0, 'container': 0 } if rule['did_type'] == 'CONTAINER': RUCIO['Grouping']['container'] += 1 elif rule['did_type'] == 'DATASET': RUCIO['Grouping']['dataset'] += 1 elif rule['did_type'] == 'FILE': RUCIO['Grouping']['file'] += 1 else: if rule['did_type'] == 'CONTAINER': RUCIO['Grouping']['container'] += 1 elif rule['did_type'] == 'DATASET': RUCIO['Grouping']['dataset'] += 1 elif rule['did_type'] == 'FILE': RUCIO['Grouping']['file'] += 1 return (RUCIO) def stats_replica_rules(self, rules): ''' Gather specific information about state and number of replicas. ''' REPLICAS = dict() REPLICAS['RSE'] = {} if rules: # Creates a key for all the RSEs that we have replicas for rule in rules: # if the RSE is not in the dictionary #print(rule['rse_expression'], REPLICAS['RSE']) if rule['rse_expression'] not in REPLICAS['RSE']: #print(REPLICAS) REPLICAS['RSE'][rule['rse_expression']] = { 'total_replica_stuck': rule['locks_stuck_cnt'], 'total_replica_replicating': rule['locks_replicating_cnt'], 'total_replica_ok': rule['locks_ok_cnt'] } # else if it is, update replica numbers else: REPLICAS['RSE'][rule['rse_expression']][ 'total_replica_stuck'] += rule['locks_stuck_cnt'] REPLICAS['RSE'][rule['rse_expression']][ 'total_replica_replicating'] += rule[ 'locks_replicating_cnt'] REPLICAS['RSE'][rule['rse_expression']][ 'total_replica_ok'] += rule['locks_ok_cnt'] return (REPLICAS) def stats_usage_rules(self, all_rses): STORAGE = dict() STORAGE['USAGE'] = {} for x_rse in all_rses: rses = self.usage(x_rse) if rses['bytes'] != 0: if rses['rse'] not in STORAGE['USAGE']: STORAGE['USAGE'][rses['rse']] = { 'total_bytes_used': rses['bytes'] } # else if it is, update replica numbers else: STORAGE['USAGE'][ rses['rse']]['total_bytes_used'] += rses['bytes'] return (STORAGE)
class TestAbacusCollectionReplica(): def setUp(self): self.account = 'root' self.scope = 'mock' self.rule_client = RuleClient() self.did_client = DIDClient() self.replica_client = ReplicaClient() self.upload_client = UploadClient() self.file_sizes = 2 self.dataset = 'dataset_%s' % generate_uuid() self.rse = 'MOCK5' self.rse_id = get_rse_id(rse=self.rse) def tearDown(self): undertaker.run(once=True) cleaner.run(once=True) reaper.run(once=True, rses=[self.rse], greedy=True) def test_abacus_collection_replica(self): """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """ self.files = [{'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1} for i in range(0, 2)] self.did_client.add_did(self.scope, self.dataset, DIDType.DATASET, lifetime=-1) self.upload_client.upload(self.files) self.did_client.attach_dids(scope=self.scope, name=self.dataset, dids=[{'name': file['did_name'], 'scope': file['did_scope']} for file in self.files]) self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1) [os.remove(file['path']) for file in self.files] # Check dataset replica after rule creation - initial data dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0] assert_equal(dataset_replica['bytes'], 0) assert_equal(dataset_replica['length'], 0) assert_equal(dataset_replica['available_bytes'], 0) assert_equal(dataset_replica['available_length'], 0) assert_equal(str(dataset_replica['state']), 'UNAVAILABLE') # Run Abacus collection_replica.run(once=True) # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0] assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes) assert_equal(dataset_replica['length'], len(self.files)) assert_equal(dataset_replica['available_bytes'], len(self.files) * self.file_sizes) assert_equal(dataset_replica['available_length'], len(self.files)) assert_equal(str(dataset_replica['state']), 'AVAILABLE') # Delete one file -> collection replica should be unavailable cleaner.run(once=True) delete_replicas(rse_id=self.rse_id, files=[{'name': self.files[0]['did_name'], 'scope': InternalScope(self.files[0]['did_scope'])}]) self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)][0] assert_equal(dataset_replica['length'], len(self.files)) assert_equal(dataset_replica['bytes'], len(self.files) * self.file_sizes) assert_equal(dataset_replica['available_length'], len(self.files) - 1) assert_equal(dataset_replica['available_bytes'], (len(self.files) - 1) * self.file_sizes) assert_equal(str(dataset_replica['state']), 'UNAVAILABLE') # Delete all files -> collection replica should be deleted cleaner.run(once=True) reaper.run(once=True, rses=[self.rse], greedy=True) self.rule_client.add_replication_rule([{'scope': self.scope, 'name': self.dataset}], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [replica for replica in self.replica_client.list_dataset_replicas(self.scope, self.dataset)] assert_equal(len(dataset_replica), 0)
class TestAbacusCollectionReplica(unittest.TestCase): def setUp(self): self.account = 'root' self.scope = 'mock' self.rse = 'MOCK5' self.file_sizes = 2 self.dataset = 'dataset_%s' % generate_uuid() self.rule_client = RuleClient() self.did_client = DIDClient() self.replica_client = ReplicaClient() self.upload_client = UploadClient() if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } else: self.vo = {} self.rse_id = get_rse_id(rse=self.rse, **self.vo) def tearDown(self): undertaker.run(once=True) cleaner.run(once=True) if self.vo: reaper.run(once=True, include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse), greedy=True) else: reaper.run(once=True, include_rses=self.rse, greedy=True) def test_abacus_collection_replica(self): """ ABACUS (COLLECTION REPLICA): Test update of collection replica. """ self.files = [{ 'did_scope': self.scope, 'did_name': 'file_' + generate_uuid(), 'path': file_generator(size=self.file_sizes), 'rse': self.rse, 'lifetime': -1 } for i in range(0, 2)] self.did_client.add_did(self.scope, self.dataset, DIDType.DATASET, lifetime=-1) self.upload_client.upload(self.files) self.did_client.attach_dids(scope=self.scope, name=self.dataset, dids=[{ 'name': file['did_name'], 'scope': file['did_scope'] } for file in self.files]) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) [os.remove(file['path']) for file in self.files] # Check dataset replica after rule creation - initial data dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert dataset_replica['bytes'] == 0 assert dataset_replica['length'] == 0 assert dataset_replica['available_bytes'] == 0 assert dataset_replica['available_length'] == 0 assert str(dataset_replica['state']) == 'UNAVAILABLE' # Run Abacus collection_replica.run(once=True) # Check dataset replica after abacus - abacus should update the collection_replica table from updated_col_rep dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert dataset_replica['bytes'] == len(self.files) * self.file_sizes assert dataset_replica['length'] == len(self.files) assert dataset_replica['available_bytes'] == len( self.files) * self.file_sizes assert dataset_replica['available_length'] == len(self.files) assert str(dataset_replica['state']) == 'AVAILABLE' # Delete one file -> collection replica should be unavailable cleaner.run(once=True) delete_replicas(rse_id=self.rse_id, files=[{ 'name': self.files[0]['did_name'], 'scope': InternalScope(self.files[0]['did_scope'], **self.vo) }]) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ][0] assert dataset_replica['length'] == len(self.files) assert dataset_replica['bytes'] == len(self.files) * self.file_sizes assert dataset_replica['available_length'] == len(self.files) - 1 assert dataset_replica['available_bytes'] == (len(self.files) - 1) * self.file_sizes assert str(dataset_replica['state']) == 'UNAVAILABLE' # Delete all files -> collection replica should be deleted cleaner.run(once=True) if self.vo: reaper.run(once=True, include_rses='vo=%s&(%s)' % (self.vo['vo'], self.rse), greedy=True) else: reaper.run(once=True, include_rses=self.rse, greedy=True) self.rule_client.add_replication_rule([{ 'scope': self.scope, 'name': self.dataset }], 1, self.rse, lifetime=-1) collection_replica.run(once=True) dataset_replica = [ replica for replica in self.replica_client.list_dataset_replicas( self.scope, self.dataset) ] assert len(dataset_replica) == 0
filename = did_dict[did]['name'] # filename = name bytes = int(did_dict[did]['size']) did_dict[did]['guid'] = str(generate_uuid()) guid = did_dict[did]['guid'] if args.dry_run: print 'pfn:', pfn print did_dict[did] break else: try: repCli.add_replica(rse_name, scope, filename, bytes, adler32=adler32, pfn=pfn, md5=md5, meta={'guid': guid}) print 'Replica for %s:%s added' % (scope, filename) ruleCli.add_replication_rule(dids=[{'scope': scope, 'name': filename}], copies=1, rse_expression=rse_name, grouping='DATASET') print 'Rule for %s:%s added' % (scope, filename) except exception.Duplicate: print 'Already replicated for %s:%s, but try adding rules' % (scope, filename) # rules = didCli.list_did_rules(scope=scope, name=filename) # rules = didCli.list_associated_rules_for_file(scope=scope, name=filename) try: # no_rules4_RSE = True # for x in rules: # if x['rse_expression'] == rse_name: # no_rules4_RSE = False # print 'Already has a rule for the RSE.' # break # else: # continue # if no_rules4_RSE:
def _rucio_register(beamline, uid, filenames): """ Register the file in rucio for replication to SDCC. """ scope = beamline container = uid replica_client = ReplicaClient() didclient = DIDClient() scopeclient = ScopeClient() ruleclient = RuleClient() for root, ending, filename in filenames: #size = os.stat(str(filename)).st_size #adler = adler32(str(filename)) files = [{ 'scope': scope, 'name': filename.split('/')[-1], 'bytes': 1000, #'adler32': "unknown", 'pfn': pfn + filename }] dataset = os.path.join(root, ending) dataset = '.'.join(dataset.split('/')[1:-1]) print("DATASET", dataset) breakpoint() try: scopeclient.add_scope(account='nsls2data', scope=scope) except rucio.common.exception.Duplicate: pass replica_client.add_replicas(rse=rse, files=files) # Create a new container if it doesn't exist. try: didclient.add_did(scope=scope, name=uid, type='container') except rucio.common.exception.DataIdentifierAlreadyExists: pass # Create a replication rule. try: dids = [{'scope': scope, 'name': container}] ruleclient.add_replication_rule( dids=dids, copies=1, rse_expression='SDCC', lifetime=86400, # Seconds account='nsls2data', source_replica_expression='NSLS2', purge_replicas=True, comment='purge_replicas in 24 hours') except rucio.common.exception.DuplicateRule: pass # Create a new dataset if it doesn't exist. try: didclient.add_did(scope=scope, name=dataset, type='dataset') except rucio.common.exception.DataIdentifierAlreadyExists: pass attachment = { 'scope': scope, 'name': uid, 'dids': [{ 'scope': scope, 'name': dataset }] } try: didclient.add_files_to_dataset(scope, dataset, files) except rucio.common.exception.FileAlreadyExists: pass try: didclient.add_datasets_to_containers([attachment]) except rucio.common.exception.DuplicateContent: pass
class TestReplicationRuleClient(): @classmethod def setUpClass(cls): # Add test RSE cls.rse1 = 'MOCK' cls.rse3 = 'MOCK3' cls.rse4 = 'MOCK4' cls.rse5 = 'MOCK5' cls.rse1_id = get_rse(cls.rse1).id cls.rse3_id = get_rse(cls.rse3).id cls.rse4_id = get_rse(cls.rse4).id cls.rse5_id = get_rse(cls.rse5).id # Add Tags cls.T1 = tag_generator() cls.T2 = tag_generator() add_rse_attribute(cls.rse1, cls.T1, True) add_rse_attribute(cls.rse3, cls.T1, True) add_rse_attribute(cls.rse4, cls.T2, True) add_rse_attribute(cls.rse5, cls.T1, True) # Add fake weights add_rse_attribute(cls.rse1, "fakeweight", 10) add_rse_attribute(cls.rse3, "fakeweight", 0) add_rse_attribute(cls.rse4, "fakeweight", 0) add_rse_attribute(cls.rse5, "fakeweight", 0) def setup(self): self.rule_client = RuleClient() self.did_client = DIDClient() self.subscription_client = SubscriptionClient() self.account_client = AccountClient() self.lock_client = LockClient() def test_add_rule(self): """ REPLICATION RULE (CLIENT): Add a replication rule """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') ret = self.rule_client.add_replication_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE') assert_is_instance(ret, list) def test_delete_rule(self): """ REPLICATION RULE (CLIENT): Delete a replication rule """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] ret = self.rule_client.delete_replication_rule(rule_id=rule_id) assert(ret is True) assert_raises(RuleNotFound, self.rule_client.delete_replication_rule, rule_id) def test_list_rules_by_did(self): """ DID (CLIENT): List Replication Rules per DID """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] rule_id_2 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse3, grouping='NONE', weight='fakeweight', lifetime=None, locked=False, subscription_id=None)[0] ret = self.did_client.list_did_rules(scope=scope, name=dataset) ids = [rule['id'] for rule in ret] assert_in(rule_id_1, ids) assert_in(rule_id_2, ids) def test_get_rule(self): """ REPLICATION RULE (CLIENT): Get Replication Rule by id """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') ret = self.rule_client.add_replication_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE') get = self.rule_client.get_replication_rule(ret[0]) assert(ret[0] == get['id']) def test_get_rule_by_account(self): """ ACCOUNT (CLIENT): Get Replication Rule by account """ scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') ret = self.rule_client.add_replication_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=2, rse_expression=self.T1, grouping='NONE') get = self.account_client.list_account_rules('jdoe') rules = [rule['id'] for rule in get] assert_in(ret[0], rules) def test_locked_rule(self): """ REPLICATION RULE (CLIENT): Delete a locked replication rule""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='NONE', weight='fakeweight', lifetime=None, locked=True, subscription_id=None)[0] assert_raises(AccessDenied, delete_rule, rule_id_1) self.rule_client.update_replication_rule(rule_id=rule_id_1, options={'locked': False}) delete_rule(rule_id=rule_id_1) def test_dataset_lock(self): """ DATASETLOCK (CLIENT): Get a datasetlock for a specific dataset""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight='fakeweight', lifetime=None, locked=True, subscription_id=None)[0] rule_ids = [lock['rule_id'] for lock in self.lock_client.get_dataset_locks(scope=scope, name=dataset)] assert_in(rule_id_1, rule_ids) def test_change_rule_lifetime(self): """ REPLICATION RULE (CLIENT): Change rule lifetime""" scope = 'mock' files = create_files(3, scope, self.rse1) dataset = 'dataset_' + str(uuid()) add_did(scope, dataset, DIDType.from_sym('DATASET'), 'jdoe') attach_dids(scope, dataset, files, 'jdoe') rule_id_1 = add_rule(dids=[{'scope': scope, 'name': dataset}], account='jdoe', copies=1, rse_expression=self.rse1, grouping='DATASET', weight='fakeweight', lifetime=150, locked=True, subscription_id=None)[0] get = self.rule_client.get_replication_rule(rule_id_1) self.rule_client.update_replication_rule(rule_id_1, options={'lifetime': 10000}) get2 = self.rule_client.get_replication_rule(rule_id_1) assert(get['expires_at'] != get2['expires_at'])