def getReplicaInfoForBlocks(self, scope='cms', dataset=None, block=None, node=None): # Mirroring PhEDEx service """ _blockreplicas_ Get replicas for given blocks dataset dataset name, can be multiple (*) block block name, can be multiple (*) node node name, can be multiple (*) se storage element name, can be multiple (*) update_since unix timestamp, only return replicas updated since this time create_since unix timestamp, only return replicas created since this time complete y or n, whether or not to require complete or incomplete blocks. Default is to return either subscribed y or n, filter for subscription. default is to return either. custodial y or n. filter for custodial responsibility. default is to return either. group group name. default is to return replicas for any group. """ block_names = [] result = {'block': []} rc = ReplicaClient(account=self.account, auth_type=self.auth_type) if isinstance(block, (list, set)): block_names = block elif block: block_names = [block] if isinstance(dataset, (list, set)): for dataset_name in dataset: block_names.extend( self.cmsBlocksInContainer(dataset_name, scope=scope)) elif dataset: block_names.extend(self.cmsBlocksInContainer(dataset, scope=scope)) for block_name in block_names: dids = [{'scope': scope, 'name': block_name}] response = rc.list_replicas(dids=dids) nodes = set() for item in response: for node, state in item['states'].items(): if state.upper() == 'AVAILABLE': nodes.add(node) result['block'].append({block_name: list(nodes)}) return result
def __lfns2pfns_client(self, lfns): """ Provides the path of a replica for non-detemernisic sites. Will be assigned to get path by the __init__ method if neccessary. :param scope: list of DIDs :returns: dict with scope:name as keys and PFN as value (in case of errors the Rucio exception si assigned to the key) """ client = ReplicaClient() pfns = {} lfns = [lfns] if type(lfns) == dict else lfns for lfn in lfns: scope = lfn['scope'] name = lfn['name'] replicas = [r for r in client.list_replicas([{'scope': scope, 'name': name}, ], schemes=[self.attributes['scheme'], ])] # schemes is used to narrow down the response message. if len(replicas) > 1: pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('This operation can only be performed for files.') if not len(replicas): pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('File not found.') pfns['%s:%s' % (scope, name)] = replicas[0]['rses'][self.rse['rse']][0] if (self.rse['rse'] in replicas[0]['rses'].keys()) else exception.RSEOperationNotSupported('Replica not found on given RSE.') return pfns
def __lfns2pfns_client(self, lfns): """ Provides the path of a replica for non-deterministic sites. Will be assigned to get path by the __init__ method if neccessary. :param scope: list of DIDs :returns: dict with scope:name as keys and PFN as value (in case of errors the Rucio exception si assigned to the key) """ client = ReplicaClient() pfns = {} lfns = [lfns] if type(lfns) == dict else lfns for lfn in lfns: scope = lfn['scope'] name = lfn['name'] replicas = [r for r in client.list_replicas([{'scope': scope, 'name': name}, ], schemes=[self.attributes['scheme'], ])] # schemes is used to narrow down the response message. if len(replicas) > 1: pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('This operation can only be performed for files.') if not len(replicas): pfns['%s:%s' % (scope, name)] = exception.RSEOperationNotSupported('File not found.') pfns['%s:%s' % (scope, name)] = replicas[0]['rses'][self.rse['rse']][0] if (self.rse['rse'] in replicas[0]['rses'].keys()) else exception.RSEOperationNotSupported('Replica not found on given RSE.') return pfns
class ShowRun(): def __init__(self): #Take all data types categories self.RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['raw_records_tpc_types'] self.RAW_RECORDS_MV_TYPES = helper.get_hostconfig( )['raw_records_mv_types'] self.RAW_RECORDS_NV_TYPES = helper.get_hostconfig( )['raw_records_nv_types'] self.LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['light_raw_records_tpc_types'] self.LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig( )['light_raw_records_mv_types'] self.LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig( )['light_raw_records_nv_types'] self.HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types'] self.RECORDS_TYPES = helper.get_hostconfig()['records_types'] #Choose which data type you want to treat self.DTYPES = self.RAW_RECORDS_TPC_TYPES + self.RAW_RECORDS_MV_TYPES + self.RAW_RECORDS_NV_TYPES + self.LIGHT_RAW_RECORDS_TPC_TYPES + self.LIGHT_RAW_RECORDS_MV_TYPES + self.LIGHT_RAW_RECORDS_NV_TYPES + self.HIGH_LEVEL_TYPES + self.RECORDS_TYPES #Take the list of all XENON RSEs self.RSES = helper.get_hostconfig()['rses'] #Take the RSE that is used to perform the upload self.UPLOAD_TO = helper.get_hostconfig()['upload_to'] #Take the directory where datamanager has to upload data self.DATADIR = helper.get_hostconfig()['path_data_to_upload'] # Get the sequence of rules to be created according to the data type self.RAW_RECORDS_TPC_RSES = helper.get_hostconfig( )["raw_records_tpc_rses"] self.RAW_RECORDS_MV_RSES = helper.get_hostconfig( )["raw_records_mv_rses"] self.RAW_RECORDS_NV_RSES = helper.get_hostconfig( )["raw_records_nv_rses"] self.LIGHT_RAW_RECORDS_TPC_RSES = helper.get_hostconfig( )["light_raw_records_tpc_rses"] self.LIGHT_RAW_RECORDS_MV_RSES = helper.get_hostconfig( )["light_raw_records_mv_rses"] self.LIGHT_RAW_RECORDS_NV_RSES = helper.get_hostconfig( )["light_raw_records_nv_rses"] self.HIGH_LEVEL_RSES = helper.get_hostconfig()["high_level_rses"] self.RECORDS_RSES = helper.get_hostconfig()["records_rses"] #Init the runDB self.db = ConnectMongoDB() #Init Rucio for later uploads and handling: self.rc = RucioSummoner() self.didclient = DIDClient() self.replicaclient = ReplicaClient() #Rucio Rule assignment priority self.priority = 3 #Parameters to write warnings self.minimum_number_acceptable_rses = 2 self.minimum_deltadays_allowed = 3 def showrun(self, arg_number, arg_to, arg_dtypes, arg_compact, arg_dumpjson, arg_status, arg_latest, arg_pending): #Define data types RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['raw_records_tpc_types'] RAW_RECORDS_MV_TYPES = helper.get_hostconfig()['raw_records_mv_types'] RAW_RECORDS_NV_TYPES = helper.get_hostconfig()['raw_records_nv_types'] LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['light_raw_records_tpc_types'] LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig( )['light_raw_records_mv_types'] LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig( )['light_raw_records_nv_types'] HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types'] RECORDS_TYPES = helper.get_hostconfig()['records_types'] #Get other parameters DATADIR = helper.get_hostconfig()['path_data_to_upload'] RSES = helper.get_hostconfig()['rses'] minimum_number_acceptable_rses = 2 minimum_deltadays_allowed = 3 # Storing some backup hashes in case DID information is not available bkp_hashes = { 'raw_records': 'rfzvpzj4mf', 'raw_records_he': 'rfzvpzj4mf', 'raw_records_mv': 'rfzvpzj4mf', 'raw_records_aqmon': 'rfzvpzj4mf', 'records': '56ausr64s7', 'lone_hits': 'b7dgmtzaef' } context = 'xenonnt_online' #Init the runDB db = ConnectMongoDB() #Init Rucio for later uploads and handling: rc = RucioSummoner(helper.get_hostconfig("rucio_backend")) rc.SetRucioAccount(helper.get_hostconfig('rucio_account')) rc.SetConfigPath(helper.get_hostconfig("rucio_cli")) rc.SetProxyTicket(helper.get_hostconfig('rucio_x509')) rc.SetHost(helper.get_hostconfig('host')) rc.ConfigHost() rc.SetProxyTicket("rucio_x509") data_types = RAW_RECORDS_TPC_TYPES + RAW_RECORDS_MV_TYPES + RAW_RECORDS_NV_TYPES + LIGHT_RAW_RECORDS_TPC_TYPES + LIGHT_RAW_RECORDS_MV_TYPES + LIGHT_RAW_RECORDS_NV_TYPES + HIGH_LEVEL_TYPES + RECORDS_TYPES # if arg_number has been given if arg_number != "": # if the "number" argument is a number, it is converted as integer if arg_number.isdigit(): arg_number = int(arg_number) # otherwise it is assumed that a DID has been given and run number and other parameters are extracted from the DID else: arg_number, dtype, hash = get_did(arg_number) arg_dtypes = [dtype] # if no arg_number has been given, then the "latest" option is activated (with 5 run numbers by default) in compact modality else: if arg_latest == 0: arg_latest = 5 arg_compact = True if arg_latest > 0: cursor = db.db.find({}).sort('number', pymongo.DESCENDING).limit(1) cursor = list(cursor) arg_to = cursor[0]['number'] arg_number = arg_to - arg_latest + 1 print('Processing latest {0} runs'.format(arg_latest)) if arg_to > arg_number: cursor = db.db.find({ 'number': { '$gte': arg_number, '$lte': arg_to } }).sort('number', pymongo.ASCENDING) print('Runs that will be processed are from {0} to {1}'.format( arg_number, arg_to)) else: cursor = db.db.find({'number': arg_number}) print('Run that will be processed is {0}'.format(arg_number)) cursor = list(cursor) # Runs over all listed runs for run in cursor: print("") # Gets run number number = run['number'] print('Run: {0}'.format(number)) # Gets the status if 'status' in run: print('Status: {0}'.format(run['status'])) else: print('Status: {0}'.format('Not available')) if arg_status: continue # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] print('Processed by: {0}'.format(eb)) if 'state' in bootstrax: print('Bootstrax state: {0}'.format(bootstrax['state'])) if bootstrax['state'] == 'abandoned': if 'reason' in bootstrax: print('Reason: {0}'.format(bootstrax['reason'])) else: print('Not processed') # Gets the date if 'start' in run: start_time = run['start'].replace(tzinfo=timezone.utc) print("Date: ", start_time.astimezone(tz=None)) # Calculates the duration if 'end' in run: if run['end'] is not None: end_time = run['end'].replace(tzinfo=timezone.utc) duration = end_time - start_time print("Duration: ", duration) else: print("Duration: ", "unknown") # Prints if run is still enough recent (three days from now) now_time = datetime.now().replace(tzinfo=timezone.utc) delta_time = now_time - start_time if delta_time < timedelta(days=minimum_deltadays_allowed): print("Less than {0} days old".format( minimum_deltadays_allowed)) else: print("Warning : no time info available") # Gets the comments if 'comments' in run: if len(run['comments']) > 0: last_comment = run['comments'][-1] print("Latest comment ({0}): {1}".format( last_comment['user'], last_comment['comment'])) # Dumps the entire rundoc under json format if arg_dumpjson: print(dumps(run, indent=4)) if arg_compact: continue # Merges data and deleted_data # if 'deleted_data' in run: # data = run['data'] + run['deleted_data'] # else: data = run['data'] # Check is there are more instances in more EventBuilders extra_ebs = set() for d in data: if 'eb' in d['host'] and eb not in d['host']: extra_ebs.add(d['host'].split('.')[0]) if len(extra_ebs) > 0: print( '\t\t Warning : The run has been processed by more than one EventBuilder: {0}' .format(extra_ebs)) # Runs over all data types to be monitored for dtype in data_types: if len(arg_dtypes) > 0: if dtype not in arg_dtypes: continue # Take the official number of files accordingto run DB # and the eb status Nfiles = -1 ebstatus = "" for d in data: if d['type'] == dtype and eb in d['host']: if 'file_count' in d: Nfiles = d['file_count'] if 'status' in d: ebstatus = d['status'] if arg_pending: if ebstatus in ["", "transferred"]: continue # Data type name print('{0}'.format(dtype)) if Nfiles == -1: print('\t Number of files: missing in DB') else: print('\t Number of files: {0}'.format(Nfiles)) if ebstatus != "": print('\t EB status: {0}'.format(ebstatus)) else: print('\t EB status: not available') # Check if data are still in the data list and not in deleted_data DB_InEB = False for d in run['data']: if d['type'] == dtype and eb in d['host']: DB_InEB = True DB_NotInEB = False if 'deleted_data' in run: for d in run['deleted_data']: if d['type'] == dtype and eb in d['host']: DB_NotInEB = True if DB_InEB and not DB_NotInEB: print('\t DB : still in EB') if not DB_InEB and DB_NotInEB: print('\t DB : deleted from EB') if DB_InEB and DB_NotInEB: print( '\t\t Incoherency in DB: it is both in data list and in deleted_data list' ) #if (DB_InEB and DB_NotInEB) or (not DB_InEB and not DB_NotInEB): # print('\t\t incoherency in DB: it is neither in data list nor in deleted_data list') # Check if data are still in the EB disks without using the DB upload_path = "" for d in run['data']: if d['type'] == dtype and eb in d['host']: file = d['location'].split('/')[-1] upload_path = os.path.join(DATADIR, eb, file) path_exists = os.path.exists(upload_path) if upload_path != "" and path_exists: path, dirs, files = next(os.walk(upload_path)) print('\t Disk: still in EB disk and with', len(files), 'files') else: print('\t Disk: not in EB disk') if DB_InEB and not path_exists: print( '\t\t Incoherency in DB and disk: it is in DB data list but it is not in the disk' ) if DB_NotInEB and path_exists: print( '\t\t Incoherency in DB and disk: it is in DB deleted_data list but it is still in the disk' ) # The list of DIDs (usually just one) dids = set() for d in data: if d['type'] == dtype and d['host'] == 'rucio-catalogue': if 'did' in d: dids.add(d['did']) print('\t DID:', dids) # Check the presence in each available RSE Nrses = 0 for rse in RSES: is_in_rse = False for d in run['data']: if d['type'] == dtype and rse in d['location']: if 'status' in d: status = d['status'] else: status = 'Not available' if 'did' in d: hash = d['did'].split('-')[-1] did = d['did'] else: print( '\t\t Warning : DID information is absent in DB data list (old admix version). Using standard hashes for RSEs' ) #hash = bkp_hashes.get(dtype) #hash = utilix.db.get_hash(context, dtype) hash = db.GetHashByContext(context, dtype) did = make_did(number, dtype, hash) rucio_rule = rc.GetRule(upload_structure=did, rse=rse) files = list_file_replicas(number, dtype, hash, rse) if rucio_rule['exists']: print('\t', rse + ': DB Yes, Status', status, ', Rucio Yes, State', rucio_rule['state'], ",", len(files), 'files') if len(files) < Nfiles and rucio_rule[ 'state'] != "REPLICATING": print( '\t\t Warning : Wrong number of files in Rucio!!!' ) else: print('\t', rse + ': DB Yes, Status', status, ', Rucio No') # print(files) is_in_rse = True Nrses += 1 if not is_in_rse: # print('\t\t Warning : data information is absent in DB data list. Trying using standard hashes to query Rucio') # hash = bkp_hashes.get(dtype) #hash = utilix.db.get_hash(context, dtype) hash = db.GetHashByContext(context, dtype) did = make_did(number, dtype, hash) print('\t Guessed DID:', did) rucio_rule = rc.GetRule(upload_structure=did, rse=rse) files = list_file_replicas(number, dtype, hash, rse) if rucio_rule['exists']: print('\t', rse + ': DB No, Rucio Yes, State', rucio_rule['state'], ",", len(files), 'files') if len(files) < Nfiles and rucio_rule[ 'state'] != "REPLICATING": print( '\t\t Warning : Wrong number of files in Rucio!!!' ) else: print('\t', rse + ': DB No, Rucio No') print('\t Number of sites: ', Nrses) def showrun_new(self, arg_number, arg_to, arg_dtypes, arg_compact, arg_dumpjson, arg_status, arg_latest, arg_pending): # if arg_number has been given if arg_number != "": # if the "number" argument is a number, it is converted as integer if arg_number.isdigit(): arg_number = int(arg_number) # otherwise it is assumed that a DID has been given and run number and other parameters are extracted from the DID else: arg_number, dtype, hash = get_did(arg_number) arg_dtypes = [dtype] # if no arg_number has been given, then the "latest" option is activated (with 5 run numbers by default) in compact modality else: if arg_latest == 0: arg_latest = 5 arg_compact = True if arg_latest > 0: cursor = self.db.db.find({}).sort('number', pymongo.DESCENDING).limit(1) cursor = list(cursor) arg_to = cursor[0]['number'] arg_number = arg_to - arg_latest + 1 print('Processing latest {0} runs'.format(arg_latest)) if arg_to > arg_number: cursor = self.db.db.find({ 'number': { '$gte': arg_number, '$lte': arg_to } }).sort('number', pymongo.ASCENDING) print('Runs that will be processed are from {0} to {1}'.format( arg_number, arg_to)) else: cursor = self.db.db.find({'number': arg_number}) print('Run that will be processed is {0}'.format(arg_number)) cursor = list(cursor) # Runs over all listed runs for run in cursor: print("") # Gets run number number = run['number'] print('Run: {0}'.format(number)) # Gets the status if 'status' in run: print('Status: {0}'.format(run['status'])) else: print('Status: {0}'.format('Not available')) if arg_status: continue # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] print('Processed by: {0}'.format(eb)) if 'state' in bootstrax: print('Bootstrax state: {0}'.format(bootstrax['state'])) if bootstrax['state'] == 'abandoned': if 'reason' in bootstrax: print('Reason: {0}'.format(bootstrax['reason'])) else: print('Not processed by EB') # Gets the date if 'start' in run: start_time = run['start'].replace(tzinfo=timezone.utc) print("Date: ", start_time.astimezone(tz=None)) # Calculates the duration if 'end' in run: if run['end'] is not None: end_time = run['end'].replace(tzinfo=timezone.utc) duration = end_time - start_time print("Duration: ", duration) else: print("Duration: ", "unknown") # Prints if run is still enough recent (three days from now) now_time = datetime.now().replace(tzinfo=timezone.utc) delta_time = now_time - start_time if delta_time < timedelta(days=self.minimum_deltadays_allowed): print("Less than {0} days old".format( self.minimum_deltadays_allowed)) else: print("Warning : no time info available") # Gets the comments if 'comments' in run: if len(run['comments']) > 0: last_comment = run['comments'][-1] print("Latest comment ({0}): {1}".format( last_comment['user'], last_comment['comment'])) # Dumps the entire rundoc under json format if arg_dumpjson: print(dumps(run, indent=4)) if arg_compact: continue # Runs over all data types stored in data and deleted_data fields alldata = run['data'] if 'deleted_data' in run: alldata = alldata + run['deleted_data'] for datum in alldata: if arg_pending: ebstatus = "" if 'status' in datum: ebstatus = datum['status'] if ebstatus in ["", "transferred"]: continue if len(arg_dtypes) > 0: if datum['type'] not in arg_dtypes: continue if eb in datum['host']: self.showdataset(run, datum) def showdataset(self, run, datum): #print(dumps(datum, indent=4)) # skip dataset if it does not have location if 'location' not in datum: print('Dataset: type {0} without location. Skipping'.format( datum['type'])) return # Dataset name number = run['number'] dtype = datum['type'] hash = datum['location'].split('-')[-1] did = make_did(number, dtype, hash) print('Dataset: {0}'.format(did)) # Event builder who treated it eb = datum['host'].split('.')[0] # Directory name directory = datum['location'].split('/')[-1] # Take the official number of files according to run DB Nfiles = -1 if 'file_count' in datum: Nfiles = datum['file_count'] if Nfiles == -1: print('\t Number of files: missing in DB') else: print('\t Number of files: {0}'.format(Nfiles)) # Take the status of the EB dataset according to run DB ebstatus = "" if 'status' in datum: ebstatus = datum['status'] if ebstatus != "": print('\t EB status: {0}'.format(ebstatus)) else: print('\t EB status: not available') # Check if there are double entries in the DB Copies = 0 for d in run['data']: if d['type'] == dtype and eb in d['host'] and hash in d['location']: Copies = Copies + 1 if Copies > 1: print('\t\t Warning {0}: EB datum has a double entry in the DB'. format(did)) # Check if there are other entries in the deleted_data (even with different EBs) #DeletedCopies = [] #for d in run['deleted_data']: # if d['type'] == dtype and hash in d['location']: # DeletedCopies.append(d['host'].split('.')[0]) #if len(DeletedCopies)>0: # print('\t Previously deleted data processed with those EBs: {0}'.format(DeletedCopies)) # Read the real number of files present in EB disks upload_path = os.path.join(self.DATADIR, eb, directory) path_exists = os.path.exists(upload_path) Nfiles_disk = 0 if path_exists: path, dirs, files = next(os.walk(upload_path)) Nfiles_disk = len(files) # If data are supposed to be (according to DB) still present in EB, check if they are there if datum in run['data']: print('\t Still in EB') if Nfiles_disk != Nfiles: print( '\t\t Warning {0}: number of files in EB disk ({1}) does not match with the DB info ({2})' .format(did, Nfiles_disk, Nfiles)) # Otherwise, if data are supposed to be (according to DB) deleted, check if they are really absent elif datum in run['deleted_data']: print('\t Deleted from EB') if Nfiles_disk > 0: print( '\t\t Warning {0}: files are still in EB disk (nfiles={1}) while DB says they are deleted ' .format(did, Nfiles_disk)) # Query rucio to see how many RSEs have those data rules = list( self.didclient.list_did_rules( did.split(':')[0], did.split(':')[1])) rses_with_data = [] for rule in rules: rses_with_data.append(rule['rse_expression']) if len(rses_with_data) > 0: print('\t Rucio replicas in {0} RSEs : {1}'.format( len(rses_with_data), rses_with_data)) else: print('\t No replicas in Rucio') # Check the presence of data in each available RSE and compatibility with DB # Step 1: prepare the dictionary rses = [] for rse in self.RSES: r = {} r['name'] = rse rses.append(r) Nrses = 0 # Step 2: filling the dictionary with RSEs info from DB and from Rucio for rse in rses: is_in_rse = False # Get info available in the DB rse['DBentries'] = 0 rse['DBStatus'] = "" for d in run['data']: if 'rucio' in d['host']: if d['did'] == did and rse['name'] in d['location']: if 'status' in d: rse['DBStatus'] = d['status'] rse['DBentries'] = rse['DBentries'] + 1 # Get info available in Rucio rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse['name']) # files = list_file_replicas(number, dtype, hash, rse['name']) # files = list(self.rc.ListFileReplicas(did,rse['name'],localpath=True).values()) did_dictionary = [{ 'scope': did.split(':')[0], 'name': did.split(':')[1] }] replicas = list( self.replicaclient.list_replicas(did_dictionary, rse_expression=rse['name'])) #print(dumps(replicas, indent=4)) rse['RucioExists'] = rucio_rule['exists'] rse['RucioNFiles'] = len(replicas) # Step 3: analysis of data for rse in rses: #print(rse) # analysis specific for uploading if rse['name'] == self.UPLOAD_TO: # Case 1 : loss of Rucio connection at the end of the upload before creating the rule if rse['RucioNFiles'] == Nfiles and not rse[ 'RucioExists'] and rse['DBStatus'] == "" and rse[ 'DBentries'] == 0 and len(rses_with_data) == 0: print( '\t\t Warning: files have been uploaded but the rule has not been created' ) print( '\t\t Hint: create the rule manually, then continue uploading, using the following three commands:' ) print('\t\t\t rucio add-rule {0} 1 {1}'.format( did, rse['name'])) print('\t\t\t admix-fix --fix_upload_db {0}'.format(did)) print('\t\t\t admix-fix --create_upload_rules {0}'.format( did)) # os.system('rucio add-rule {0} 1 {1}'.format(did,rse['name'])) # os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did)) # os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did)) # Case 2 : loss of Rucio connection at the end of the upload before updating the DB if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "" and rse['DBentries'] == 0 and len( rses_with_data) == 1: print( '\t\t Warning: the upload is completed, but DB needs to be updated and rules have to be created abroad' ) print('\t\t Hint: fix it manually with the two commands:') print('\t\t\t admix-fix --fix_upload_db {0}'.format(did)) print('\t\t\t admix-fix --create_upload_rules {0}'.format( did)) # os.system('~/.local/bin/admix-fix --fix_upload_db {0}'.format(did)) # os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did)) # Case 3 : loss of Rucio connection at the end of the upload before creating the rules abroad if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "transferred" and rse[ 'DBentries'] == 1 and len(rses_with_data) == 1: print( '\t\t Warning: the upload is completed and the DB updated, but rules have to be created abroad' ) print('\t\t Hint: fix it manually with the command:') print('\t\t\t admix-fix --create_upload_rules {0}'.format( did)) # os.system('~/.local/bin/admix-fix --create_upload_rules {0}'.format(did)) # Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it if rse['RucioNFiles'] == 0 and not rse['RucioExists'] and rse[ 'DBStatus'] == "" and rse['DBentries'] == 0 and len( rses_with_data) == 0 and ebstatus not in [ "", "transferred" ]: print( '\t\t Warning: the upload never started but the EB status is not empty, hence admix cannot upload it' ) print( '\t\t Hint: fix it manually with the following command to allow admix upload manager to take care of it:' ) print( '\t\t\t admix-fix --set_eb_status {0} eb_ready_to_upload' .format(did)) # os.system('~/.local/bin/admix-fix --set_eb_status {0} eb_ready_to_upload'.format(did)) # Case 4 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it if rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "transferred" and rse[ 'DBentries'] == 1 and len( rses_with_data) > 0 and ebstatus not in [ "", "transferred" ]: print( '\t\t Warning: the upload is completed and there are also copies abroad' ) print( '\t\t Hint: fix it manually with the command below to flag the EB datum as transferred:' ) print('\t\t\t admix-fix --set_eb_status {0} transferred'. format(did)) # os.system('~/.local/bin/admix-fix --set_eb_status {0} transferred'.format(did)) # Case 5 : data still to be uploaded but the value if the EB status is not empty so admix cannot upload it if rse['RucioNFiles'] != Nfiles and rse['RucioExists'] and rse[ 'DBStatus'] == "" and rse['DBentries'] == 0 and len( rses_with_data ) == 1 and ebstatus == "transferring": print( '\t\t Warning: the upload has been interrupted during the copy' ) print( '\t\t Hint: fix it manually with the command below to resume the upload:' ) print('\t\t\t admix-fix --fix_upload {0}'.format(did)) # analysis for all RSEs other than datamanager else: if not ((rse['RucioNFiles'] == Nfiles and rse['RucioExists'] and rse['DBentries'] == 1 and rse['DBStatus'] == 'transferred') or (rse['RucioNFiles'] == 0 and not rse['RucioExists'] and rse['DBentries'] == 0 and rse['DBStatus'] != 'transferred')): print( '\t\t Warning {0}: data in RSE {1} are inconsistent:'. format(did, rse['name'])) print('\t\t ', rse)
class TestReplicaClients: def setup(self): self.replica_client = ReplicaClient() self.did_client = DIDClient() def test_add_list_bad_replicas(self): """ REPLICA (CLIENT): Add bad replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK') rse_id1 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_bad_file_replicas( replicas, 'This is a good reason') assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id1: if badrep['scope'] == rep['scope'] and badrep[ 'name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Run necromancer once necromancer_run(threads=1, bulk=10000, once=True) # Try to attach a lost file tmp_dsn = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) with assert_raises(UnsupportedOperation): self.did_client.add_files_to_dataset(tmp_scope, name=tmp_dsn, files=files, rse='MOCK') # Adding replicas to non-deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for _ in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK2') rse_id2 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) print(replicas, list_rep) r = self.replica_client.declare_bad_file_replicas( replicas, 'This is a good reason') print(r) assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id2: if badrep['scope'] == rep['scope'] and badrep[ 'name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Now adding non-existing bad replicas files = [ 'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_bad_file_replicas( files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_add_suspicious_replicas(self): """ REPLICA (CLIENT): Add suspicious replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas( replicas, 'This is a good reason') assert_equal(r, {}) # Adding replicas to non-deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas( replicas, 'This is a good reason') assert_equal(r, {}) # Now adding non-existing bad replicas files = [ 'srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_suspicious_file_replicas( files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_bad_replica_methods_for_UI(self): """ REPLICA (REST): Test the listing of bad and suspicious replicas """ mw = [] headers1 = { 'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******' } result = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) assert_equal(result.status, 200) token = str(result.header('X-Rucio-Auth-Token')) headers2 = {'X-Rucio-Auth-Token': str(token)} data = dumps({}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_files = [] for line in result.body.split('\n'): if line != '': tot_files.append(dumps(line)) nb_tot_files = len(tot_files) data = dumps({'state': 'B'}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_bad_files = [] for line in result.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files1 = len(tot_bad_files) data = dumps({'state': 'S', 'list_pfns': 'True'}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_suspicious_files = [] for line in result.body.split('\n'): if line != '': tot_suspicious_files.append(dumps(line)) nb_tot_suspicious_files = len(tot_suspicious_files) data = dumps({'state': 'T', 'list_pfns': 'True'}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_temporary_unavailable_files = [] for line in result.body.split('\n'): if line != '': tot_temporary_unavailable_files.append(dumps(line)) nb_tot_temporary_unavailable_files = len( tot_temporary_unavailable_files) assert_equal( nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files + nb_tot_temporary_unavailable_files) tomorrow = datetime.utcnow() + timedelta(days=1) data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) tot_bad_files = [] for line in result.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files = len(tot_bad_files) assert_equal(nb_tot_bad_files, 0) data = dumps({}) result = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary', headers=headers2, params=data, expect_errors=True) assert_equal(result.status, 200) nb_tot_bad_files2 = 0 for line in result.body.split('\n'): if line != '': line = loads(line) nb_tot_bad_files2 += int(line.get('BAD', 0)) assert_equal(nb_tot_bad_files1, nb_tot_bad_files2) def test_add_list_replicas(self): """ REPLICA (CLIENT): Add, change state and list file replicas """ tmp_scope = 'mock' nbfiles = 5 files1 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files1) files2 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files2) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files1]) ] assert_equal(len(replicas), len(files1)) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files2], schemes=['file']) ] assert_equal(len(replicas), 5) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files2], schemes=['srm']) ] assert_equal(len(replicas), 5) files3 = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'state': 'U', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files3) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files3], schemes=['file']) ] for i in range(nbfiles): assert_equal(replicas[i]['rses'], {}) files4 = [] for file in files3: file['state'] = 'A' files4.append(file) self.replica_client.update_replicas_states('MOCK3', files=files4) replicas = [ r for r in self.replica_client.list_replicas( dids=[{ 'scope': i['scope'], 'name': i['name'] } for i in files3], schemes=['file'], unavailable=True) ] assert_equal(len(replicas), 5) for i in range(nbfiles): assert_in('MOCK3', replicas[i]['rses']) def test_delete_replicas(self): """ REPLICA (CLIENT): Add and delete file replicas """ tmp_scope = 'mock' nbfiles = 5 files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) with assert_raises(AccessDenied): self.replica_client.delete_replicas(rse='MOCK', files=files) # replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files])] # assert_equal(len(replicas), 0) def test_add_temporary_unavailable_pfns(self): """ REPLICA (CLIENT): Add temporary unavailable PFNs""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE list_rep = [] for replica in self.replica_client.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in files], schemes=['srm'], unavailable=True): pfn = replica['pfns'].keys()[0] list_rep.append(pfn) # Submit bad PFNs now = datetime.utcnow() reason_str = generate_uuid() self.replica_client.add_bad_pfns(pfns=list_rep, reason=str(reason_str), state='TEMPORARY_UNAVAILABLE', expires_at=now.isoformat()) result = get_bad_pfns(limit=10000, thread=None, total_threads=None, session=None) bad_pfns = {} for res in result: bad_pfns[res['pfn']] = (res['state'], res['reason'], res['expires_at']) for pfn in list_rep: pfn = str(clean_surls([pfn])[0]) assert_in(pfn, bad_pfns) assert_equal(str(bad_pfns[pfn][0]), 'TEMPORARY_UNAVAILABLE') assert_equal(bad_pfns[pfn][1], reason_str) # Submit with wrong state with assert_raises(RucioException): self.replica_client.add_bad_pfns(pfns=list_rep, reason=str(reason_str), state='BADSTATE', expires_at=now.isoformat()) # Run minos once minos_run(threads=1, bulk=10000, once=True) result = get_bad_pfns(limit=10000, thread=None, total_threads=None, session=None) pfns = [res['pfn'] for res in result] res_pfns = [] for replica in list_rep: if replica in pfns: res_pfns.append(replica) assert_equal(res_pfns, []) # Check the state in the replica table for did in files: rep = get_replicas_state(scope=did['scope'], name=did['name']) assert_equal(str(rep.keys()[0]), 'TEMPORARY_UNAVAILABLE') rep = [] for did in files: did['state'] = ReplicaState.from_sym('TEMPORARY_UNAVAILABLE') rep.append(did) # Run the minos expiration minos_temp_run(threads=1, once=True) # Check the state in the replica table for did in files: rep = get_replicas_state(scope=did['scope'], name=did['name']) assert_equal(str(rep.keys()[0]), 'AVAILABLE') def test_set_tombstone(self): """ REPLICA (CLIENT): set tombstone on replica """ # Set tombstone on one replica rse = 'MOCK4' scope = 'mock' user = '******' name = generate_uuid() add_replica(rse, scope, name, 4, user) assert_equal(get_replica(rse, scope, name)['tombstone'], None) self.replica_client.set_tombstone([{ 'rse': rse, 'scope': scope, 'name': name }]) assert_equal(get_replica(rse, scope, name)['tombstone'], OBSOLETE) # Set tombstone on locked replica name = generate_uuid() add_replica(rse, scope, name, 4, user) RuleClient().add_replication_rule([{ 'name': name, 'scope': scope }], 1, rse, locked=True) with assert_raises(ReplicaIsLocked): self.replica_client.set_tombstone([{ 'rse': rse, 'scope': scope, 'name': name }]) # Set tombstone on not found replica name = generate_uuid() with assert_raises(ReplicaNotFound): self.replica_client.set_tombstone([{ 'rse': rse, 'scope': scope, 'name': name }])
def test_replica_no_site(self): """ REPLICA (CORE): Test listing replicas without site attribute """ rc = ReplicaClient() rse = 'APERTURE_%s' % rse_name_generator() add_rse(rse) add_protocol( rse, { 'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) add_rse_attribute(rse=rse, key='site', value='APERTURE') files = [{ 'scope': 'mock', 'name': 'element_%s' % generate_uuid(), 'bytes': 1234, 'adler32': 'deadbeef' }] add_replicas(rse=rse, files=files, account='root') replicas = [ r for r in rc.list_replicas(dids=[{ 'scope': 'mock', 'name': f['name'] } for f in files]) ] assert_in('root://', replicas[0]['pfns'].keys()[0]) replicas = [ r for r in rc.list_replicas(dids=[{ 'scope': 'mock', 'name': f['name'] } for f in files], client_location={'site': 'SOMEWHERE'}) ] assert_in('root://', replicas[0]['pfns'].keys()[0]) del_rse_attribute(rse=rse, key='site') replicas = [ r for r in rc.list_replicas(dids=[{ 'scope': 'mock', 'name': f['name'] } for f in files]) ] assert_in('root://', replicas[0]['pfns'].keys()[0]) replicas = [ r for r in rc.list_replicas(dids=[{ 'scope': 'mock', 'name': f['name'] } for f in files], client_location={'site': 'SOMEWHERE'}) ] assert_in('root://', replicas[0]['pfns'].keys()[0])
class TestReplicaMetalink: def setup(self): self.did_client = DIDClient() self.replica_client = ReplicaClient() self.base_client = BaseClient(account='root', ca_cert=config_get('client', 'ca_cert'), auth_type='x509') self.token = self.base_client.headers['X-Rucio-Auth-Token'] self.fname = generate_uuid() rses = ['MOCK', 'MOCK3', 'MOCK4'] dsn = generate_uuid() self.files = [{ 'scope': 'mock', 'name': self.fname, 'bytes': 1, 'adler32': '0cc737eb' }] self.did_client.add_dataset(scope='mock', name=dsn) self.did_client.add_files_to_dataset('mock', name=dsn, files=self.files, rse='MOCK') for r in rses: self.replica_client.add_replicas(r, self.files) def test_list_replicas_metalink_4(self): """ REPLICA (METALINK): List replicas as metalink version 4 """ ml = xmltodict.parse(self.replica_client.list_replicas( self.files, metalink=4, unavailable=True, schemes=['https', 'sftp', 'file']), xml_attribs=False) assert_equal(3, len(ml['metalink']['file']['url'])) def test_get_did_from_pfns_nondeterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for non-deterministic sites""" rse = 'MOCK2' tmp_scope = 'mock' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], False) files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': { 'events': 10 } } for _ in range(nbfiles)] for f in files: input[f['pfn']] = {'scope': f['scope'], 'name': f['name']} add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for replica in list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'], 'type': DIDType.FILE } for f in files], schemes=['srm'], ignore_availability=True): for rse in replica['rses']: pfns.extend(replica['rses'][rse]) for result in self.replica_client.get_did_from_pfns(pfns, rse): pfn = result.keys()[0] assert_equal(input[pfn], result.values()[0]) def test_get_did_from_pfns_deterministic(self): """ REPLICA (CLIENT): Get list of DIDs associated to PFNs for deterministic sites""" tmp_scope = 'mock' rse = 'MOCK3' nbfiles = 3 pfns = [] input = {} rse_info = rsemgr.get_rse_info(rse) assert_equal(rse_info['deterministic'], True) files = [{ 'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': { 'events': 10 } } for _ in range(nbfiles)] p = rsemgr.create_protocol(rse_info, 'read', scheme='srm') for f in files: pfn = p.lfns2pfns(lfns={ 'scope': f['scope'], 'name': f['name'] }).values()[0] pfns.append(pfn) input[pfn] = {'scope': f['scope'], 'name': f['name']} add_replicas(rse=rse, files=files, account='root', ignore_availability=True) for result in self.replica_client.get_did_from_pfns(pfns, rse): pfn = result.keys()[0] assert_equal(input[pfn], result.values()[0])
class TestReplicaSorting(unittest.TestCase): def setUp(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': config_get('client', 'vo', raise_exception=False, default='tst')} else: self.vo = {} def test_replica_sorting(self): """ REPLICA (CORE): Test the correct sorting of the replicas across WAN and LAN """ self.rc = ReplicaClient() self.rse1 = 'APERTURE_%s' % rse_name_generator() self.rse2 = 'BLACKMESA_%s' % rse_name_generator() self.rse1_id = add_rse(self.rse1, **self.vo) self.rse2_id = add_rse(self.rse2, **self.vo) add_rse_attribute(rse_id=self.rse1_id, key='site', value='APERTURE') add_rse_attribute(rse_id=self.rse2_id, key='site', value='BLACKMESA') self.files = [{'scope': InternalScope('mock', **self.vo), 'name': 'element_0', 'bytes': 1234, 'adler32': 'deadbeef'}] root = InternalAccount('root', **self.vo) add_replicas(rse_id=self.rse1_id, files=self.files, account=root) add_replicas(rse_id=self.rse2_id, files=self.files, account=root) add_protocol(self.rse1_id, {'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}}) add_protocol(self.rse1_id, {'scheme': 'davs', 'hostname': 'davs.aperture.com', 'port': 443, 'prefix': '/test/chamber/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 2, 'write': 2, 'delete': 2}, 'wan': {'read': 2, 'write': 2, 'delete': 2}}}) add_protocol(self.rse1_id, {'scheme': 'gsiftp', 'hostname': 'gsiftp.aperture.com', 'port': 8446, 'prefix': '/test/chamber/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 0, 'write': 0, 'delete': 0}, 'wan': {'read': 3, 'write': 3, 'delete': 3}}}) add_protocol(self.rse2_id, {'scheme': 'gsiftp', 'hostname': 'gsiftp.blackmesa.com', 'port': 8446, 'prefix': '/lambda/complex/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 2, 'write': 2, 'delete': 2}, 'wan': {'read': 1, 'write': 1, 'delete': 1}}}) add_protocol(self.rse2_id, {'scheme': 'davs', 'hostname': 'davs.blackmesa.com', 'port': 443, 'prefix': '/lambda/complex/', 'impl': 'rucio.rse.protocols.gfal.Default', 'domains': { 'lan': {'read': 0, 'write': 0, 'delete': 0}, 'wan': {'read': 2, 'write': 2, 'delete': 2}}}) add_protocol(self.rse2_id, {'scheme': 'root', 'hostname': 'root.blackmesa.com', 'port': 1409, 'prefix': '//lambda/complex/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 3, 'write': 3, 'delete': 3}}}) replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], client_location={'site': 'APERTURE'})] pfns = [r['pfns'] for r in replicas][0] assert len(pfns.keys()) == 5 assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] == 1 assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] == 2 assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] == 3 assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['priority'] == 4 assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] == 5 replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], client_location={'site': 'BLACKMESA'})] pfns = [r['pfns'] for r in replicas][0] assert len(pfns.keys()) == 5 assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] == 1 assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'lan' assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] == 2 assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] == 3 assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] == 4 assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['priority'] == 5 replicas = [r for r in self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], client_location={'site': 'XEN'})] pfns = [r['pfns'] for r in replicas][0] assert len(pfns.keys()) == 6 # TODO: intractable until RSE sorting is enabled assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0']['priority'] in [1, 2] assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0']['priority'] in [1, 2] assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0']['priority'] in [3, 4] assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0']['priority'] in [3, 4] assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0']['priority'] in [5, 6] assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['domain'] == 'wan' assert pfns['root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0']['priority'] in [5, 6] ml = self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], metalink=True, client_location={'site': 'APERTURE'}) assert 'domain="lan" priority="1" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml assert 'domain="lan" priority="2" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="3" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="4" client_extract="false">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="5" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml assert 'priority="6"' not in ml ml = self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], metalink=True, client_location={'site': 'BLACKMESA'}) assert 'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml assert 'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml assert 'priority="6"' not in ml # TODO: intractable until RSE sorting is enabled # ml = self.rc.list_replicas(dids=[{'scope': 'mock', # 'name': f['name'], # 'type': 'FILE'} for f in self.files], # schemes=['root', 'gsiftp', 'davs'], # metalink=True, # client_location={'site': 'XEN'}) # assert 'domain="wan" priority="1">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="2">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="3">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="4">davs://davs.blackmesa.com:443/lambda/complex/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="5">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml # assert 'domain="wan" priority="6">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml # assert 'priority="7"' not in ml # ensure correct handling of disabled protocols add_protocol(self.rse1_id, {'scheme': 'root', 'hostname': 'root2.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': {'read': 1, 'write': 1, 'delete': 1}, 'wan': {'read': 0, 'write': 0, 'delete': 0}}}) ml = self.rc.list_replicas(dids=[{'scope': 'mock', 'name': f['name'], 'type': 'FILE'} for f in self.files], schemes=['root', 'gsiftp', 'davs'], metalink=True, client_location={'site': 'BLACKMESA'}) assert 'domain="lan" priority="1" client_extract="false">root://root.blackmesa.com:1409//lambda/complex/mock/58/b5/element_0' in ml assert 'domain="lan" priority="2" client_extract="false">gsiftp://gsiftp.blackmesa.com:8446/lambda/complex/mock/58/b5/element_0' in ml assert 'domain="wan" priority="3" client_extract="false">root://root.aperture.com:1409//test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="4" client_extract="false">davs://davs.aperture.com:443/test/chamber/mock/58/b5/element_0' in ml assert 'domain="wan" priority="5" client_extract="false">gsiftp://gsiftp.aperture.com:8446/test/chamber/mock/58/b5/element_0' in ml assert 'priority="6"' not in ml delete_replicas(rse_id=self.rse1_id, files=self.files) delete_replicas(rse_id=self.rse2_id, files=self.files) del_rse(self.rse1_id) del_rse(self.rse2_id)
class TestROOTProxy(unittest.TestCase): @classmethod def setUpClass(self): if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = { 'vo': config_get('client', 'vo', raise_exception=False, default='tst') } self.vo_header = {'X-Rucio-VO': self.vo['vo']} else: self.vo = {} self.vo_header = {} self.rc = ReplicaClient() self.client_location_without_proxy = { 'ip': '192.168.0.1', 'fqdn': 'anomalous-materials.blackmesa.com', 'site': 'BLACKMESA1' } self.rse_without_proxy = rse_name_generator() self.rse_without_proxy_id = add_rse(self.rse_without_proxy, **self.vo) add_rse_attribute(rse_id=self.rse_without_proxy_id, key='site', value='BLACKMESA1') self.client_location_with_proxy = { 'ip': '10.0.1.1', 'fqdn': 'test-chamber.aperture.com', 'site': 'APERTURE1' } self.rse_with_proxy = rse_name_generator() self.rse_with_proxy_id = add_rse(self.rse_with_proxy, **self.vo) add_rse_attribute(rse_id=self.rse_with_proxy_id, key='site', value='APERTURE1') # APERTURE1 site has an internal proxy config_set('root-proxy-internal', 'APERTURE1', 'proxy.aperture.com:1094') self.files = [{ 'scope': InternalScope('mock', **self.vo), 'name': 'half-life_%s' % i, 'bytes': 1234, 'adler32': 'deadbeef', 'meta': { 'events': 666 } } for i in range(1, 4)] for rse_id in [self.rse_with_proxy_id, self.rse_without_proxy_id]: add_replicas(rse_id=rse_id, files=self.files, account=InternalAccount('root', **self.vo), ignore_availability=True) add_protocol( self.rse_without_proxy_id, { 'scheme': 'root', 'hostname': 'root.blackmesa.com', 'port': 1409, 'prefix': '//training/facility/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) add_protocol( self.rse_with_proxy_id, { 'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) @classmethod def tearDownClass(cls): for rse_id in [cls.rse_with_proxy_id, cls.rse_without_proxy_id]: delete_replicas(rse_id=rse_id, files=cls.files) del_rse(cls.rse_with_proxy_id) del_rse(cls.rse_without_proxy_id) def test_client_list_replicas1(self): """ ROOT (CLIENT): No proxy involved """ replicas = [ r for r in self.rc.list_replicas( dids=[{ 'scope': 'mock', 'name': f['name'], 'type': 'FILE' } for f in self.files], rse_expression=self.rse_without_proxy, client_location=self.client_location_without_proxy) ] expected_pfns = [ 'root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1', 'root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2', 'root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' ] found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas] assert sorted(found_pfns) == sorted(expected_pfns) def test_client_list_replicas2(self): """ ROOT (CLIENT): Outgoing proxy needs to be prepended""" replicas = [ r for r in self.rc.list_replicas( dids=[{ 'scope': 'mock', 'name': f['name'], 'type': 'FILE' } for f in self.files], rse_expression=self.rse_without_proxy, client_location=self.client_location_with_proxy) ] expected_pfns = [ 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1', 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2', 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' ] found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas] assert sorted(found_pfns) == sorted(expected_pfns) def test_client_list_replicas3(self): """ ROOT (CLIENT): Outgoing proxy at destination does not matter""" replicas = [ r for r in self.rc.list_replicas( dids=[{ 'scope': 'mock', 'name': f['name'], 'type': 'FILE' } for f in self.files], rse_expression=self.rse_with_proxy, client_location=self.client_location_without_proxy) ] expected_pfns = [ 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1', 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2', 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' ] found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas] assert sorted(found_pfns) == sorted(expected_pfns) def test_client_list_replicas4(self): """ ROOT (CLIENT): Outgoing proxy does not matter when staying at site""" replicas = [ r for r in self.rc.list_replicas( dids=[{ 'scope': 'mock', 'name': f['name'], 'type': 'FILE' } for f in self.files], rse_expression=self.rse_with_proxy, client_location=self.client_location_with_proxy) ] expected_pfns = [ 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1', 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2', 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' ] found_pfns = [list(replica['pfns'].keys())[0] for replica in replicas] assert sorted(found_pfns) == sorted(expected_pfns) def test_redirect_metalink_list_replicas(self): """ ROOT (REDIRECT REST): Test internal proxy prepend with metalink""" mw = [] # default behaviour - no location -> no proxy res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_1/metalink', headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1' in body assert 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1' in body assert 'proxy' not in body res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_2/metalink', headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2' in body assert 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2' in body assert 'proxy' not in body res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_3/metalink', headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' in body assert 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' in body assert 'proxy' not in body # site without proxy res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_1/metalink?%s' % urlencode(self.client_location_without_proxy), headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1' in body assert 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1' in body assert 'proxy' not in body res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_2/metalink?%s' % urlencode(self.client_location_without_proxy), headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2' in body assert 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2' in body assert 'proxy' not in body res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_3/metalink?%s' % urlencode(self.client_location_without_proxy), headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' in res assert 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' in res assert 'proxy' not in res # at location with outgoing proxy, prepend for wan replica res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_1/metalink?%s' % urlencode(self.client_location_with_proxy), headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c9/df/half-life_1' in body assert 'root://root.aperture.com:1409//test/chamber/mock/c9/df/half-life_1' in body res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_2/metalink?%s' % urlencode(self.client_location_with_proxy), headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/c1/8d/half-life_2' in body assert 'root://root.aperture.com:1409//test/chamber/mock/c1/8d/half-life_2' in body res = TestApp(redirect_app.wsgifunc(*mw)).get( '/mock/half-life_3/metalink?%s' % urlencode(self.client_location_with_proxy), headers=self.vo_header, expect_errors=True) body = res.body.decode() assert 'root://proxy.aperture.com:1094//root://root.blackmesa.com:1409//training/facility/mock/16/30/half-life_3' in body assert 'root://root.aperture.com:1409//test/chamber/mock/16/30/half-life_3' in body
class TestArchive(object): def __init__(self): self.dc = DIDClient() self.rc = ReplicaClient() def test_add_and_list_archive(self): """ ARCHIVE (CLIENT): Add files to archive and list the content """ scope, rse = 'mock', 'MOCK' archive_files = ['file_' + generate_uuid() + '.zip' for _ in range(2)] files = [] for i in range(10): files.append({ 'scope': scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'type': 'FILE', 'meta': { 'guid': str(generate_uuid()) } }) for archive_file in archive_files: self.rc.add_replicas(rse=rse, files=[{ 'scope': scope, 'name': archive_file, 'bytes': 1, 'adler32': '0cc737eb' }]) self.dc.add_files_to_archive(scope=scope, name=archive_file, files=files) content = [ f for f in self.dc.list_archive_content(scope=scope, name=archive_file) ] assert_equal(len(content), 10) def test_list_archive_contents_transparently(self): """ ARCHIVE (CORE): Transparent archive listing """ scope = 'mock' rse = 'APERTURE_%s' % rse_name_generator() add_rse(rse) add_protocol( rse, { 'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) # register archive archive = { 'scope': scope, 'name': 'weighted.storage.cube.zip', 'type': 'FILE', 'bytes': 2596, 'adler32': 'beefdead' } add_replicas(rse=rse, files=[archive], account='root') # archived files with replicas files_with_replicas = [{ 'scope': scope, 'name': 'witrep-%i-%s' % (i, str(generate_uuid())), 'type': 'FILE', 'bytes': 1234, 'adler32': 'deadbeef' } for i in xrange(2)] add_replicas(rse=rse, files=files_with_replicas, account='root') self.dc.add_files_to_archive(scope=scope, name=archive['name'], files=files_with_replicas) res = [ r['pfns'] for r in self.rc.list_replicas(dids=[{ 'scope': scope, 'name': f['name'] } for f in files_with_replicas]) ] assert_equal(len(res), 2) assert_equal(len(res[0]), 2) assert_equal(len(res[1]), 2) for r in res: for p in r: if r[p]['domain'] == 'zip': assert_in('weighted.storage.cube.zip?xrdcl.unzip=witrep-', p) else: assert_not_in( 'weighted.storage.cube.zip?xrdcl.unzip=witrep-', p) # archived files without replicas files = [{ 'scope': scope, 'name': 'norep-%i-%s' % (i, str(generate_uuid())), 'type': 'FILE', 'bytes': 1234, 'adler32': 'deadbeef' } for i in xrange(2)] self.dc.add_files_to_archive(scope=scope, name=archive['name'], files=files) res = [ r['pfns'] for r in self.rc.list_replicas(dids=[{ 'scope': scope, 'name': f['name'] } for f in files]) ] assert_equal(len(res), 2) for r in res: assert_in('weighted.storage.cube.zip?xrdcl.unzip=norep-', r.keys()[0]) del_rse(rse)
class Fix(): def __init__(self): #Take all data types categories self.RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['raw_records_tpc_types'] self.RAW_RECORDS_MV_TYPES = helper.get_hostconfig( )['raw_records_mv_types'] self.RAW_RECORDS_NV_TYPES = helper.get_hostconfig( )['raw_records_nv_types'] self.LIGHT_RAW_RECORDS_TPC_TYPES = helper.get_hostconfig( )['light_raw_records_tpc_types'] self.LIGHT_RAW_RECORDS_MV_TYPES = helper.get_hostconfig( )['light_raw_records_mv_types'] self.LIGHT_RAW_RECORDS_NV_TYPES = helper.get_hostconfig( )['light_raw_records_nv_types'] self.HIGH_LEVEL_TYPES = helper.get_hostconfig()['high_level_types'] self.RECORDS_TYPES = helper.get_hostconfig()['records_types'] #Choose which data type you want to treat self.DTYPES = self.RAW_RECORDS_TPC_TYPES + self.RAW_RECORDS_MV_TYPES + self.RAW_RECORDS_NV_TYPES + self.LIGHT_RAW_RECORDS_TPC_TYPES + self.LIGHT_RAW_RECORDS_MV_TYPES + self.LIGHT_RAW_RECORDS_NV_TYPES + self.HIGH_LEVEL_TYPES + self.RECORDS_TYPES #Take the list of all XENON RSEs self.RSES = helper.get_hostconfig()['rses'] #Take the RSE that is used to perform the upload self.UPLOAD_TO = helper.get_hostconfig()['upload_to'] #Take the directory where datamanager has to upload data self.DATADIR = helper.get_hostconfig()['path_data_to_upload'] # Get the sequence of rules to be created according to the data type self.RAW_RECORDS_TPC_RSES = helper.get_hostconfig( )["raw_records_tpc_rses"] self.RAW_RECORDS_MV_RSES = helper.get_hostconfig( )["raw_records_mv_rses"] self.RAW_RECORDS_NV_RSES = helper.get_hostconfig( )["raw_records_nv_rses"] self.LIGHT_RAW_RECORDS_TPC_RSES = helper.get_hostconfig( )["light_raw_records_tpc_rses"] self.LIGHT_RAW_RECORDS_MV_RSES = helper.get_hostconfig( )["light_raw_records_mv_rses"] self.LIGHT_RAW_RECORDS_NV_RSES = helper.get_hostconfig( )["light_raw_records_nv_rses"] self.HIGH_LEVEL_RSES = helper.get_hostconfig()["high_level_rses"] self.RECORDS_RSES = helper.get_hostconfig()["records_rses"] #Init the runDB self.db = ConnectMongoDB() #Init Rucio for later uploads and handling: self.rc = RucioSummoner() #Init the Rucio replica client self.replicaclient = ReplicaClient() #Rucio Rule assignment priority self.priority = 3 def reset_upload(self, did): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print("Resetting the upload associated to the DID: {0}".format(did)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) # Gets the status if 'status' in run: print('Run status: {0}'.format(run['status'])) else: print('Run status: {0}'.format('Not available')) # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] else: print('Not processed') return (0) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] if datum is None: print('There is no EB datum. No reset is possible') return (0) if ebstatus != "": print('EB status: {0}'.format(ebstatus)) else: print('EB status: not available') # Step zero (normally not needed): change the run status to "transferring" # self.db.db.find_one_and_update({'number':number},{'$set':{"status": "transferring"}}) # First action: remove the files stored in datamanager files = list_file_replicas(number, dtype, hash, self.UPLOAD_TO) print("Deleting rucio data in datamanager disk. Deleting", len(files), "files") for file in files: try: os.remove(file) except: print("File: {0} not found".format(file)) # Second action: remove the LNGS Rucio rule deleted_any_rule = False for rse in self.RSES: rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse) if rucio_rule['exists']: print("Deleting rucio rule = ", rucio_rule['id'], "from RSE = ", rse) self.rc.DeleteRule(rucio_rule['id']) deleted_any_rule = True # Third action: remove possible files in datamanager in case the Rucio rule does not exists datamanager_rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) if not datamanager_rucio_rule['exists']: print( "Rucio rule not existing. Deleting data in datamanager without Rucio" ) filelistname = os.path.join( "/archive/data/rucio/xnt_%06d/*/*/" % number, dtype + "-" + hash + "*") filelist = glob.glob(filelistname) for filePath in filelist: try: os.remove(filePath) except: print("Error while deleting file : ", filePath) # If some rule has been deleted, wait for 1 hour (plus 5 minutes of margin) if deleted_any_rule: print( "We have to wait until the rule is fully deleted before changing the status of the datum. It could take at least an hour" ) while True: datamanager_rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) if not datamanager_rucio_rule['exists']: print("Rule for did {0} finally deleted".format(did)) break delay = 60 * 10 time.sleep(delay) else: print("There is no rule to delete") # Fourth action: set the EB status as 'eb_ready_to_upload' self.db.db.find_one_and_update( { '_id': run['_id'], 'data': { '$elemMatch': { 'type': datum['type'], 'location': datum['location'], 'host': datum['host'] } } }, {'$set': { "data.$.status": 'eb_ready_to_upload' }}) print("EB status changed to eb_ready_to_upload") # Reload the run run = self.db.db.find_one({'number': number}) # Gets the status if 'status' in run: print('New run status: {0}'.format(run['status'])) else: print('Ru status: {0}'.format('Not available')) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] # Prints the eb status as a confirmation of the performed change if ebstatus != "": print('New EB status: {0}'.format(ebstatus)) else: print('New EB status: not available') def add_rule(self, did, from_rse, to_rse): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print("Adding a new rule {0} from {1} to {2}".format( did, from_rse, to_rse)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) # Gets the status if 'status' in run: print('Run status: {0}'.format(run['status'])) else: print('Run status: {0}'.format('Not available')) #Checks if the datum of the sender exists in the DB datum = None for d in run['data']: if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[ 'location'] == from_rse: datum = d break if datum is None: print( 'The datum concerning data type {0} and site {1} is missing in the DB. Forced to stop' .format(dtype, from_rse)) return (0) # Checks the rule status of the sender RSE rucio_rule = self.rc.GetRule(upload_structure=did, rse=from_rse) if rucio_rule['state'] != 'OK' and rucio_rule['state'] != 'REPLICATING': print( 'The rule in {0} is neither OK nor REPLICATING. Forced to stop' .format(from_rse)) return (0) # set the new rule if not self.skip_rucio: print("Adding the Rucio rule") self.rc.AddConditionalRule(did, from_rse, to_rse, lifetime=None, priority=self.priority) else: print("Rucio rule is not added") rucio_rule = self.rc.GetRule(did, rse=to_rse) # Update run status self.db.db.find_one_and_update({'number': number}, {'$set': { 'status': 'transferring' }}) # Add a new datum in the run document updated_fields = { 'host': "rucio-catalogue", 'type': dtype, 'location': to_rse, 'lifetime': rucio_rule['expires'], 'status': 'transferring', 'did': did, 'protocol': 'rucio' } data_dict = datum.copy() data_dict.update(updated_fields) self.db.AddDatafield(run['_id'], data_dict) print("Done.") def add_rules_from_file(self, filename, from_rse, to_rse): with open(filename) as f: dids = f.read().splitlines() f.close() for did in dids: if did[0] == "#": continue hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) timestamp = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime(time.time())) print("{0} - Adding a new rule {1} from {2} to {3}".format( timestamp, did, from_rse, to_rse)) # Checks the rule status of the sender RSE rucio_rule = self.rc.GetRule(upload_structure=did, rse=from_rse) if rucio_rule['state'] != 'OK' and rucio_rule[ 'state'] != 'REPLICATING': print( 'The rule in {0} is neither OK nor REPLICATING. Skipping this DID' .format(from_rse)) continue # Checks the rule status of the destination RSE rucio_rule = self.rc.GetRule(upload_structure=did, rse=to_rse) if rucio_rule['exists']: print( 'The rule in {0} already exists and its status is {1}. Skipping this DID' .format(to_rse, rucio_rule['state'])) continue # Creates the new rule print("Adding the Rucio rule") self.rc.AddConditionalRule(did, from_rse, to_rse, lifetime=None, priority=5) # Waits until Rucio sees this rule as successfully transferred print("Waiting until the transfer is completed") rule_is_ok = False while not rule_is_ok: delay = 10 #60 time.sleep(delay) rucio_rule = self.rc.GetRule(did, rse=to_rse) if rucio_rule['state'] == 'OK': rule_is_ok = True print("Transfer completed") wait_time = 10 print('Waiting for {0} seconds'.format(wait_time)) print("You can safely CTRL-C now if you need to stop me") try: time.sleep(wait_time) except KeyboardInterrupt: break def delete_rule(self, did, rse): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print("Deleting the rule {0} from {1}".format(did, rse)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) #Checks if the datum exists in the DB datum = None for d in run['data']: if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[ 'location'] == rse: datum = d break #Delete the datum if datum is not None: self.db.RemoveDatafield(run['_id'], datum) print("Datum deleted in DB.") else: print('There is no datum to delete') #Get the rule of a given DID rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse) #Delete the rule if rucio_rule['exists']: self.rc.DeleteRule(rucio_rule['id']) print("Rucio rule deleted.") else: print('There is no Rucio rule to delete') #In case it is datamanager, directly delete files if rse == self.UPLOAD_TO: files = list_file_replicas(number, dtype, hash, self.UPLOAD_TO) print("Deleting rucio data in datamanager disk. Deleting", len(files), "files") for file in files: try: os.remove(file) except: print("File: {0} not found".format(file)) print("Done.") def create_upload_rules(self, did): rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) dtype = did.split('-')[0].split(':')[-1] # Fourth action: creating the rules abroad if rucio_rule['exists'] and rucio_rule['state'] == "OK": print("Adding the Rucio rules abroad...") rses = [self.UPLOAD_TO] if dtype in self.RAW_RECORDS_TPC_TYPES: rses = rses + self.RAW_RECORDS_TPC_RSES if dtype in self.RAW_RECORDS_MV_TYPES: rses = rses + self.RAW_RECORDS_MV_RSES if dtype in self.RAW_RECORDS_NV_TYPES: rses = rses + self.RAW_RECORDS_NV_RSES if dtype in self.LIGHT_RAW_RECORDS_TPC_TYPES: rses = rses + self.LIGHT_RAW_RECORDS_TPC_RSES if dtype in self.LIGHT_RAW_RECORDS_MV_TYPES: rses = rses + self.LIGHT_RAW_RECORDS_MV_RSES if dtype in self.LIGHT_RAW_RECORDS_NV_TYPES: rses = rses + self.LIGHT_RAW_RECORDS_NV_RSES if dtype in self.HIGH_LEVEL_TYPES: rses = rses + self.HIGH_LEVEL_RSES if dtype in self.RECORDS_TYPES: rses = rses + self.RECORDS_RSES for from_rse, to_rse in zip(rses, rses[1:]): to_rule = self.rc.GetRule(upload_structure=did, rse=to_rse) if not to_rule['exists']: print("Rule from {0} to {1}".format(from_rse, to_rse)) self.add_rule(did, from_rse, to_rse) def fix_upload(self, did): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print("Fixing the upload associated to the DID: {0}".format(did)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) # Gets the status if 'status' in run: print('Run status: {0}'.format(run['status'])) else: print('Run status: {0}'.format('Not available')) # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] else: print('Not processed') return (0) print("EB: {0}".format(eb)) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] if datum is None: print('There is no EB datum. No fix is possible') return (0) if ebstatus != "": print('EB status: {0}'.format(ebstatus)) else: print('EB status: not available') # Get the expected number of files Nfiles = -1 if 'file_count' in datum: Nfiles = datum['file_count'] # First action: remove files in datamanager no matter if they were already uploaded or not print( "Removing all files so far uploaded (successfully or not) in datamanager..." ) filelistname = os.path.join( "/archive/data/rucio/xnt_%06d/*/*/" % number, dtype + "-" + hash + "*") filelist = glob.glob(filelistname) for filePath in filelist: try: os.remove(filePath) except: print("Error while deleting file : ", filePath) # Second action: complete the missing uploads on the existing rule print('Resuming the upload...') file = datum['location'].split('/')[-1] upload_path = os.path.join(self.DATADIR, eb, file) self.rc.UploadToDid(did, upload_path, self.UPLOAD_TO) # Third action: check if datum in DB does not exist. If not, add it and mark the EB datum as transferred datum_upload = None for d in run['data']: if 'did' in d: if d['did'] == did and d['host'] == 'rucio-catalogue' and d[ 'location'] == self.UPLOAD_TO: datum_upload = d break if datum_upload is None: print( 'The datum concerning data type {0} and site {1} is missing in the DB. It will be added' .format(did, self.UPLOAD_TO)) # Update the eb data entry with status "transferred" self.db.db.find_one_and_update( { '_id': run['_id'], 'data': { '$elemMatch': { 'type': datum['type'], 'location': datum['location'], 'host': datum['host'] } } }, {'$set': { "data.$.status": "transferred" }}) # Add a new data field with LNGS as RSE and with status "transferred" data_dict = datum.copy() data_dict.update({ 'host': "rucio-catalogue", 'type': dtype, 'location': "LNGS_USERDISK", 'lifetime': 0, 'status': 'transferred', 'did': did, 'protocol': 'rucio' }) self.db.AddDatafield(run['_id'], data_dict) # Third action: in case the rule itself is missing, this would create it rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) #print(rucio_rule) if not rucio_rule['exists']: print( 'Even if files have been uploaded, the rule has not been created yet. Creating it...' ) did_dictionary = [{ 'scope': did.split(':')[0], 'name': did.split(':')[1] }] replicas = list( self.replicaclient.list_replicas( did_dictionary, rse_expression=self.UPLOAD_TO)) if len(replicas) != Nfiles: print( 'Error: the rule cannot be created beause the number of files uploaded ({0}) is different from the expected one ({1})' .format(len(replicas), Nfiles)) return (0) if rucio_rule['exists']: print( 'Error: the rule cannot be created beause it exists already' ) return (0) os.system('rucio add-rule {0} 1 {1}'.format(did, self.UPLOAD_TO)) # Fourth action: creating the rules abroad self.create_upload_rules(did) return (0) def delete_db_datum(self, did, site): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print( "Removing the datum from DB for the DID: {0} and from the site {1}" .format(did, site)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) print("Site: {0}".format(site)) run = self.db.db.find_one({'number': number}) # Get the EB datum and its status datum = None for d in run['data']: if 'eb' in site: if d['type'] == dtype and site in d[ 'host'] and 'xenon.local' in d['host']: datum = d break else: if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[ 'location'] == site: datum = d break if datum is not None: self.db.RemoveDatafield(run['_id'], datum) print("Done.") else: print('There is no datum. Nothing has been deleted') def set_run_status(self, number, status): number = int(number) print("Setting the status of run {0} to the value {1}".format( number, status)) run = self.db.db.find_one({'number': number}) print("status before = ", run['status']) self.db.db.find_one_and_update({'_id': run['_id']}, {'$set': { "status": status }}) run = self.db.db.find_one({'number': number}) print("status after = ", run['status']) def set_eb_status(self, did, status): print("Setting the EB status of DID {0} to the value {1}".format( did, status)) hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] else: print('Not processed') return (0) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] if datum is None: print('There is no EB datum.') return (0) if ebstatus != "": print("EB status before = ", ebstatus) else: print("EB status absent before") #Set the aimed value # self.db.db.find_one_and_update({'_id': run['_id'],'data': {'$elemMatch': datum}}, # {'$set': {'data.$.status': status}}) self.db.db.find_one_and_update( { '_id': run['_id'], 'data': { '$elemMatch': { 'type': datum['type'], 'location': datum['location'], 'host': datum['host'] } } }, {'$set': { "data.$.status": status }}) run = self.db.db.find_one({'number': number}) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] print("EB status after = ", ebstatus) def list_non_transferred_runs(self): runs = self.db.db.find({'status': "transferring"}, { 'number': 1, 'data': 1 }) # dtypes = ["records","records_he", "records_nv", "records_mv"] # dtypes = ["records_nv"] dtypes = ["raw_records"] for run in runs: for d in run['data']: if d['type'] in dtypes and d['host'] == 'rucio-catalogue' and d[ 'location'] == 'LNGS_USERDISK': print(run['number'], d['did'], d['status'], " ", end='') for deb in run['data']: if deb['type'] == d['type'] and 'eb' in deb['host']: print(deb['host'], deb['status'], end='') print("") def test(self): # runs = self.db.db.find({'number' : "transferring"},{'number' : 1, 'data' : 1}) # self.db.db.find_one_and_update({'number': 23838, 'deleted_data.type' : 'raw_records' }, # { '$set': { "deleted_data.$.file_count" : 44 } }) runs = self.db.db.find( { 'status': "transferred", 'number': { "$gte": 31113 } }, { 'number': 1, 'status': 1, 'data': 1 }) for run in runs: doit = False for d in run['data']: if d['type'] == 'afterpulses': doit = True if doit: print(run['number']) # self.set_run_status(run['number'],'transferring' ) def test_db_modification(self, did, new_status_name): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print( "Testing how quickly a modification in DB is registered. Using DID: {0}" .format(did)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) # Gets the status if 'status' in run: print('Run status: {0}'.format(run['status'])) else: print('Run status: {0}'.format('Not available')) # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] else: print('Not processed') return (0) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] if datum is None: print('There is no EB datum. No reset is possible') return (0) if ebstatus != "": print('EB status: {0}'.format(ebstatus)) else: print('EB status: not available') # Start the changes: set the EB status as 'eb_ready_to_upload' self.db.db.find_one_and_update( { '_id': run['_id'], 'data': { '$elemMatch': { 'type': datum['type'], 'location': datum['location'], 'host': datum['host'] } } }, {'$set': { "data.$.status": new_status_name }}) print("EB status changed to {0}".format(new_status_name)) # Reload the run run = self.db.db.find_one({'number': number}) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] # Prints the eb status as a confirmation of the performed change if ebstatus != "": print('New EB status: {0}'.format(ebstatus)) else: print('New EB status: not available') def __del__(self): pass def fix_upload_db(self, did): hash = did.split('-')[-1] dtype = did.split('-')[0].split(':')[-1] number = int(did.split(':')[0].split('_')[-1]) print("Fixing the upload associated to the DID: {0}".format(did)) print("Run number: {0}".format(number)) print("Data type: {0}".format(dtype)) print("Hash: {0}".format(hash)) run = self.db.db.find_one({'number': number}) # Gets the status if 'status' in run: print('Run status: {0}'.format(run['status'])) else: print('Run status: {0}'.format('Not available')) # Extracts the correct Event Builder machine who processed this run # Then also the bootstrax state and, in case it was abandoned, the reason if 'bootstrax' in run: bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] else: print('Not processed') return (0) #Checks if the LNGS datum exists already in the DB for d in run['data']: if d['type'] == dtype and d['host'] == 'rucio-catalogue' and d[ 'location'] == "LNGS_USERDISK": print( 'The datum concerning did {0} for location {1} is already present in DB. Forced to stop' .format(did, "LNGS_USERDISK")) return (0) # Get the EB datum and its status ebstatus = "" datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d if 'status' in d: ebstatus = d['status'] if datum is None: print('There is no EB datum. No fix is possible') return (0) # Update the eb data entry with status "transferred" self.db.db.find_one_and_update( { '_id': run['_id'], 'data': { '$elemMatch': { 'type': datum['type'], 'location': datum['location'], 'host': datum['host'] } } }, {'$set': { "data.$.status": "transferred" }}) # Add a new data field with LNGS as RSE and with status "trasferred" data_dict = datum.copy() data_dict.update({ 'host': "rucio-catalogue", 'type': dtype, 'location': "LNGS_USERDISK", 'lifetime': 0, 'status': 'transferred', 'did': did, 'protocol': 'rucio' }) self.db.AddDatafield(run['_id'], data_dict) if ebstatus != "": print('EB status: {0}'.format(ebstatus)) else: print('EB status: not available') print('Done') def postpone(self): # Get the current screen session process = psutil.Process() screen = process.parent().parent().parent().parent().cmdline()[-1] # Take the tmp file of this session containing the dataset information filename = "/tmp/admix-" + screen # Destination name suffix = time.strftime("-%Y-%m-%d-%H-%M-%S", time.localtime(time.time())) destination_path = helper.get_hostconfig( )['path_datasets_to_fix'] + "/" new_filename = destination_path + filename.split('/')[-1] + suffix if os.path.isfile(filename) and os.path.isdir(destination_path): shutil.move(filename, new_filename) print("Dataset postponed by moving file {0} to {1}".format( filename, new_filename))
class RunSync(object): """ Synchronize the replica of a given run at WIPAC-ORIG the corresponding Rucio site. """ def __init__(self, run, originrse=DEFAULT_ORIGIN_RSE, destrse=None, scope=DEFAULT_SCOPE, check=True, lifetime=None, dry_run=False, container=None): """ :param dataset: Name of the PhEDEx dataset to synchronize with Rucio. :param pnn: PhEDEx node name to filter on for replica information. """ self.run = run self.originrse = originrse self.destrse = destrse self.scope = scope self.check = check self.lifetime = lifetime self.dry_run = dry_run self.container = container self.rucio_datasets = {} self.run_files = {} self.existent_replica_files = {} self.url = '' self.gfal = Gfal2Context() self.run_Number = None self.get_run_Number() self.files_storage = {} self.get_global_url() self.didc = DIDClient() self.repc = ReplicaClient() self.rulesClient = RuleClient() # Right now obtaining the Metadata from the storage at WIPAC # Hopefully in the future from JADE # TODO self.get_run_Files() self.get_rucio_metadata() self.update_run_Files() self.get_files_metadata() def update_run_Files(self): """ Updating the run files wiht only the files that have not been registered """ for f in self.existent_replica_files: file_name = f.split('/')[-1:][0] if file_name in self.run_files: print("File: %s already registered. Skipping it" % file_name) self.run_files.pop(file_name) def get_files_metadata(self): for f in self.run_files: if self.run + '/' + f not in self.existent_replica_files: self.obtain_metadata(f) print("Metadat initialization done") def obtain_metadata(self, filename): """ Get the size and checksum for every file in the run from the gftp server """ url = self.get_file_url(filename) print("checking metadata for url %s" % url) try: size = self.gfal.stat(str(url)).st_size adler32 = self.gfal.checksum(str(url), 'adler32') print( "got size and adler 32checksum of file: pfn=%s size=%s checksum=%s" % (url, size, adler32)) self.run_files[filename] = { 'size': size, 'adler32': adler32, 'name': self.run + '/' + filename } except GError: print("no file found at %s" % url) return False def get_file_url(self, filename): return self.url + '/' + self.run + '/' + filename def get_global_url(self): """ Return the base path of the rucio url """ print("Getting parameters for rse %s" % self.originrse) rse = rsemgr.get_rse_info(self.originrse) proto = rse['protocols'][0] schema = proto['scheme'] prefix = proto['prefix'] + self.scope.replace('.', '/') if schema == 'srm': prefix = proto['extended_attributes']['web_service_path'] + prefix url = schema + '://' + proto['hostname'] if proto['port'] != 0: url = url + ':' + str(proto['port']) self.url = url + prefix print("Determined base url %s" % self.url) def get_run_Number(self): """ Obtain the run number out of whole run IceCube/2016/filtered/level2pass2/0101/Run00127347 """ print("Obtaining run number out of run(dataset): %s" % self.run) self.run_Number = self.run.split("/")[-1] print("Run number (dataset): %s" % self.run_Number) def get_run_Files(self): """ Gets the list of files for a given run and their checksums from the storage """ self.run_url = self.url + '/' + self.run print("Listin files from url : %s" % self.run_url) run_files = [] try: run_files = self.gfal.listdir(str(self.run_url)) except GError: print("No files found at %s" % str(self.run_url)) print("Files found in storage:") count = 0 for f in run_files: if len(f) > 3: if count < 5000: self.run_files[f] = {} count = count + 1 else: break def get_rucio_metadata(self): """ Gets the list of datasets at the Rucio RSE, the files, and the metadata. """ print( "Initializing Rucio... getting the list of blocks and files at %s" % self.originrse) registered_datasets = self.repc.list_datasets_per_rse(self.originrse) for dataset in registered_datasets: self.rucio_datasets[dataset] = {} replica_info = self.repc.list_replicas([{ "scope": self.scope, "name": '/' + self.run_Number }], rse_expression="rse=%s" % self.originrse) replica_files = set() for file_info in replica_info: name = file_info['name'] if self.originrse in file_info['rses']: replica_files.add(name) self.existent_replica_files = replica_files print("Rucio initialization done.") def register(self): """ Create the container, the datasets and attach them to the container. """ print("Registering...") self.register_dataset(self.run_Number) self.register_replicas(self.run_files) self.register_container(self.container) self.attach_dataset_to_container(self.run_Number, self.container) self.add_replica_rule(dataset=self.run_Number, destRSE=self.destrse) def register_container(self, container): """ Registering the container """ print("Registering the container %s with scope: %s" % (container, self.scope)) if container is None: print('No container added, not registering any container') return if self.dry_run: print('Dry run only, not registering the container') return try: self.didc.add_container(scope=self.scope, name=container, lifetime=self.lifetime) except DataIdentifierAlreadyExists: print("Container %s already exists" % container) except InvalidObject: print("Problem with container name: %s" % container) def attach_dataset_to_container(self, dataset, container): """ Attaching the dataset to a container """ print("Attaching dataset %s, to container: %s" % (dataset, container)) if container is None: print('No container added, not registering dataset in container') return if self.dry_run: print('Dry run only, not attaching dataset container') return try: self.didc.attach_dids(scope=self.scope, name=container, dids=[{ 'scope': self.scope, 'name': '/' + dataset }]) except RucioException: print("dataset already attached to container") return def register_dataset(self, run): """ Registering a dataset in the rucio database """ print("registering dataset %s" % run) if self.dry_run: print(' Dry run only. Not creating dataset.') return try: self.didc.add_dataset(scope=self.scope, name=run, lifetime=self.lifetime) except DataIdentifierAlreadyExists: print(" Dataset %s already exists" % run) def register_replicas(self, replicas): """ Register file replica. """ if not replicas: return print("registering files in Rucio: %s" % ", ".join([replicas[filemd]['name'] for filemd in replicas])) if self.dry_run: print(' Dry run only. Not registering files.') return try: self.repc.add_replicas(rse=self.originrse, files=[{ 'scope': self.scope, 'name': replicas[filemd]['name'], 'adler32': replicas[filemd]['adler32'], 'bytes': replicas[filemd]['size'], } for filemd in replicas]) print("Adding files to dataset: %s" % self.run_Number) except InvalidObject: print("Problem with file name does not match pattern") for filemd in replicas: try: self.didc.attach_dids(scope=self.scope, name=self.run_Number, dids=[{ 'scope': self.scope, 'name': replicas[filemd]['name'] }]) except FileAlreadyExists: print("File already attached") def add_replica_rule(self, destRSE, dataset): """ Create a replication rule for one dataset "Run" at an RSE """ print("Creating replica rule for dataset %s at rse: %s" % (dataset, destRSE)) if self.dry_run: print(' Dry run only. Not creating rules') return if destRSE: try: self.rulesClient.add_replication_rule([{ "scope": self.scope, "name": "/" + dataset }], copies=1, rse_expression=destRSE) except DuplicateRule: print('Rule already exists')
####from client#### from rucio.client.replicaclient import ReplicaClient rep = ReplicaClient() #did = 'ams-user-chenghsi:Acceptance_Form.jpg'.split(':') did = 'ams-2011B-ISS.B620-pass4:1368923945.00000001.root' #did = 'ams-2011B-ISS.B620-pass4:2011-06-14' did_list = did.split(':') scope = did_list[0] filename = did_list[1] rse_name = 'TW-EOS01_AMS02DATADISK' adler32 = '' md5 = '' bytes = 0 #print 'before:' for x in rep.list_replicas([{'scope': scope, 'name': filename}]): adler32 = x['adler32'] md5 = x['md5'] bytes = x['bytes'] print adler32, md5, bytes #from rucio.client.didclient import DIDClient #did = DIDClient() #file_meta = did.get_metadata(scope, filename) #rep.delete_replicas(rse_name, [{'scope': scope, 'name': filename}]) #print 'after deletion:' #for x in rep.list_replicas([{'scope': scope, 'name': filename}]): # print x #rep.add_replica(rse_name, scope, filename, bytes, adler32, md5, file_meta) print 'test' #print 'after add:' #for x in rep.list_replicas([{'scope': scope, 'name': filename}]): # print x
GFAL = Gfal2Context() try: SIZE = GFAL.stat(str(URL)).st_size CHECKSUM = GFAL.checksum(str(URL), 'adler32') print("Registering file: pfn=%s size=%s checksum=%s" % (URL, SIZE, CHECKSUM)) except GError: print("no file found at %s" % URL) exit() R = ReplicaClient() REPLICAS = list( R.list_replicas([{ 'scope': OPTIONS.scope, 'name': OPTIONS.name }])) if REPLICAS: REPLICAS = REPLICAS[0] if 'rses' in REPLICAS: if OPTIONS.rse in REPLICAS['rses']: print("file %s with scope %s has already a replica at %s" % (OPTIONS.name, OPTIONS.scope, OPTIONS.rse)) exit() REPLICA = [{ 'scope': OPTIONS.scope, 'name': OPTIONS.name, 'adler32': CHECKSUM, 'bytes': SIZE, 'pfn': URL
class DatasetInjector(object): """ General Class for injecting a cms dataset in rucio """ def __init__(self, dataset, site, rse=None, scope=DEFAULT_SCOPE, uuid=None, check=True, lifetime=None, dry_run=False): self.dataset = dataset self.site = site if rse is None: rse = site self.rse = rse self.scope = scope self.uuid = uuid self.check = check self.lifetime = lifetime self.dry_run = dry_run self.blocks = [] self.url = '' self.getmetadata() self.get_global_url() self.didc = DIDClient() self.repc = ReplicaClient() self.gfal = Gfal2Context() def get_file_url(self, lfn): """ Return the rucio url of a file. """ return self.url + '/' + lfn def get_global_url(self): """ Return the base path of the rucio url """ print("Getting parameters for rse %s" % self.rse) rse = rsemgr.get_rse_info(self.rse) proto = rse['protocols'][0] schema = proto['scheme'] prefix = proto['prefix'] + '/' + self.scope.replace('.', '/') if schema == 'srm': prefix = proto['extended_attributes']['web_service_path'] + prefix url = schema + '://' + proto['hostname'] if proto['port'] != 0: url = url + ':' + str(proto['port']) self.url = url + prefix print("Determined base url %s" % self.url) def getmetadata(self): """ Gets the list of blocks at a site, their files and their metadata """ print("Initializing... getting the list of blocks and files") blocks = das_go_client("block dataset=%s site=%s system=phedex" % (self.dataset, self.site)) for item in blocks: uuid = item['block'][0]['name'].split('#')[1] if (self.uuid is None) or (uuid == self.uuid): block = {'name': item['block'][0]['name'], 'files': []} files = das_go_client("file block=%s site=%s system=phedex" % (block['name'], self.site)) for item2 in files: cksum = re.match(r"adler32:([^,]+)", item2['file'][0]['checksum']) cksum = cksum.group(0).split(':')[1] cksum = "{0:0{1}x}".format(int(cksum, 16), 8) block['files'].append({ 'name': item2['file'][0]['name'], 'checksum': cksum, 'size': item2['file'][0]['size'] }) self.blocks.append(block) print("Initalization done.") def register(self): """ Create the container, the datasets and attach them to the container. """ print("Registering...") self.register_container() for block in self.blocks: self.register_dataset(block['name']) for filemd in block['files']: self.register_replica(filemd) self.attach_file(filemd['name'], block['name']) print("All datasets, blocks and files registered") def register_container(self): """ Create the container. """ print("registering container %s" % self.dataset) if self.dry_run: print(' Dry run only. Not creating container.') return try: self.didc.add_container(scope=self.scope, name=self.dataset, lifetime=self.lifetime) except DataIdentifierAlreadyExists: print(" Container %s already exists" % self.dataset) def register_dataset(self, block): """ Create the dataset and attach them to teh container """ print("registering dataset %s" % block) if self.dry_run: print(' Dry run only. Not creating dataset.') return try: self.didc.add_dataset(scope=self.scope, name=block, lifetime=self.lifetime) except DataIdentifierAlreadyExists: print(" Dataset %s already exists" % block) try: print("attaching dataset %s to container %s" % (block, self.dataset)) self.didc.attach_dids(scope=self.scope, name=self.dataset, dids=[{ 'scope': self.scope, 'name': block }]) except RucioException: print(" Dataset already attached") def attach_file(self, lfn, block): """ Attach the file to the container """ if self.dry_run: print(' Dry run only. Not attaching files.') return try: print("attaching file %s" % lfn) self.didc.attach_dids(scope=self.scope, name=block, dids=[{ 'scope': self.scope, 'name': lfn }]) except FileAlreadyExists: print("File already attached") def register_replica(self, filemd): """ Register file replica. """ print("registering file %s" % filemd['name']) if self.dry_run: print(' Dry run only. Not registering files.') return if self.check: self.check_storage(filemd) if not self.check_replica(filemd['name']): self.repc.add_replicas(rse=self.rse, files=[{ 'scope': self.scope, 'name': filemd['name'], 'adler32': filemd['checksum'], 'bytes': filemd['size'], 'pfn': self.get_file_url(filemd['name']) }]) def check_storage(self, filemd): """ Check size and checksum of a file on storage """ url = self.get_file_url(filemd['name']) print("checking url %s" % url) try: size = self.gfal.stat(str(url)).st_size checksum = self.gfal.checksum(str(url), 'adler32') print("got size and checksum of file: pfn=%s size=%s checksum=%s" % (url, size, checksum)) except GError: print("no file found at %s" % url) return False if str(size) != str(filemd['size']): print("wrong size for file %s. Expected %s got %s" % (filemd['name'], filemd['size'], size)) return False if str(checksum) != str(filemd['checksum']): print("wrong checksum for file %s. Expected %s git %s" % (filemd['name'], filemd['checksum'], checksum)) return False print("size and checksum are ok") return True def check_replica(self, lfn): """ Check if a replica of the given file at the site already exists. """ print("checking if file %s with scope %s has already a replica at %s" % (lfn, self.scope, self.rse)) replicas = list( self.repc.list_replicas([{ 'scope': self.scope, 'name': lfn }])) if replicas: replicas = replicas[0] if 'rses' in replicas: if self.rse in replicas['rses']: print("file %s with scope %s has already a replica at %s" % (lfn, self.scope, self.rse)) return True print("no existing replicas") return False
class TestArchive(object): def __init__(self): self.dc = DIDClient() self.rc = ReplicaClient() if config_get_bool('common', 'multi_vo', raise_exception=False, default=False): self.vo = {'vo': 'tst'} else: self.vo = {} def test_add_and_list_archive(self): """ ARCHIVE (CLIENT): Add files to archive and list the content """ scope, rse = 'mock', 'MOCK' archive_files = ['file_' + generate_uuid() + '.zip' for _ in range(2)] files = [] for i in range(10): files.append({ 'scope': scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'type': 'FILE', 'meta': { 'guid': str(generate_uuid()) } }) for archive_file in archive_files: self.rc.add_replicas(rse=rse, files=[{ 'scope': scope, 'name': archive_file, 'bytes': 1, 'adler32': '0cc737eb' }]) self.dc.add_files_to_archive(scope=scope, name=archive_file, files=files) content = [ f for f in self.dc.list_archive_content(scope=scope, name=archive_file) ] assert_equal(len(content), 10) def test_list_archive_contents_transparently(self): """ ARCHIVE (CORE): Transparent archive listing """ scope = InternalScope('mock', **self.vo) rse = 'APERTURE_%s' % rse_name_generator() rse_id = add_rse(rse, **self.vo) root = InternalAccount('root', **self.vo) add_protocol( rse_id, { 'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) # register archive archive = { 'scope': scope, 'name': 'weighted.storage.cube.zip', 'type': 'FILE', 'bytes': 2596, 'adler32': 'beefdead' } archive_client = archive.copy() archive_client['scope'] = archive_client['scope'].external add_replicas(rse_id=rse_id, files=[archive], account=root) # archived files with replicas files_with_replicas = [{ 'scope': scope, 'name': 'witrep-%i-%s' % (i, str(generate_uuid())), 'type': 'FILE', 'bytes': 1234, 'adler32': 'deadbeef' } for i in range(2)] files_with_replicas_client = [] for f in files_with_replicas: new_file = f.copy() new_file['scope'] = new_file['scope'].external files_with_replicas_client.append(new_file) add_replicas(rse_id=rse_id, files=files_with_replicas, account=root) self.dc.add_files_to_archive(scope=scope.external, name=archive_client['name'], files=files_with_replicas_client) res = [ r['pfns'] for r in self.rc.list_replicas(dids=[{ 'scope': scope.external, 'name': f['name'] } for f in files_with_replicas_client], resolve_archives=True) ] assert_equal(len(res), 2) assert_equal(len(res[0]), 2) assert_equal(len(res[1]), 2) for r in res: for p in r: if r[p]['domain'] == 'zip': assert_in('weighted.storage.cube.zip?xrdcl.unzip=witrep-', p) else: assert_not_in( 'weighted.storage.cube.zip?xrdcl.unzip=witrep-', p) # archived files without replicas files = [{ 'scope': scope.external, 'name': 'norep-%i-%s' % (i, str(generate_uuid())), 'type': 'FILE', 'bytes': 1234, 'adler32': 'deadbeef' } for i in range(2)] self.dc.add_files_to_archive(scope=scope.external, name=archive_client['name'], files=files) res = [ r['pfns'] for r in self.rc.list_replicas(dids=[{ 'scope': scope.external, 'name': f['name'] } for f in files], resolve_archives=True) ] assert_equal(len(res), 2) for r in res: assert_in('weighted.storage.cube.zip?xrdcl.unzip=norep-', r.keys()[0]) def test_list_archive_contents_at_rse(self): """ ARCHIVE (CORE): Transparent archive listing at RSE """ scope = InternalScope('mock', **self.vo) root = InternalAccount('root', **self.vo) rse1 = 'APERTURE_%s' % rse_name_generator() rse1_id = add_rse(rse1, **self.vo) add_protocol( rse1_id, { 'scheme': 'root', 'hostname': 'root.aperture.com', 'port': 1409, 'prefix': '//test/chamber/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) rse2 = 'BLACKMESA_%s' % rse_name_generator() rse2_id = add_rse(rse2, **self.vo) add_protocol( rse2_id, { 'scheme': 'root', 'hostname': 'root.blackmesa.com', 'port': 1409, 'prefix': '//lambda/complex/', 'impl': 'rucio.rse.protocols.xrootd.Default', 'domains': { 'lan': { 'read': 1, 'write': 1, 'delete': 1 }, 'wan': { 'read': 1, 'write': 1, 'delete': 1 } } }) # register archive archive1 = { 'scope': scope, 'name': 'cube.1.zip', 'type': 'FILE', 'bytes': 2596, 'adler32': 'beefdead' } archive2 = { 'scope': scope, 'name': 'cube.2.zip', 'type': 'FILE', 'bytes': 5432, 'adler32': 'deadbeef' } add_replicas(rse_id=rse1_id, files=[archive1], account=root) add_replicas(rse_id=rse2_id, files=[archive2], account=root) # archived files with replicas archived_file = [{ 'scope': scope.external, 'name': 'zippedfile-%i-%s' % (i, str(generate_uuid())), 'type': 'FILE', 'bytes': 4322, 'adler32': 'beefbeef' } for i in range(2)] self.dc.add_files_to_archive(scope=scope.external, name=archive1['name'], files=archived_file) self.dc.add_files_to_archive(scope=scope.external, name=archive2['name'], files=archived_file) res = [ r['pfns'] for r in self.rc.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in archived_file], rse_expression=rse1, resolve_archives=True) ] res = self.rc.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in archived_file], metalink=True, rse_expression=rse1, resolve_archives=True) assert_in('APERTURE', res) assert_not_in('BLACKMESA', res) res = self.rc.list_replicas(dids=[{ 'scope': f['scope'], 'name': f['name'] } for f in archived_file], metalink=True, rse_expression=rse2, resolve_archives=True) assert_in('BLACKMESA', res) assert_not_in('APERTURE', res)
try: SIZE = os.stat(PREFIX+'/'+OPTIONS.pfn).st_size CHECKSUM = adler32(PREFIX+'/'+OPTIONS.pfn) # SIZE = GFAL.stat(str(URL)).st_size # CHECKSUM = GFAL.checksum(str(URL), 'adler32') print("Registering file: pfn=%s size=%s checksum=%s" % (URL, SIZE, CHECKSUM)) #except GError: except: print("no file found at %s" % URL) exit() R = ReplicaClient() REPLICAS = list(R.list_replicas([{'scope': OPTIONS.scope, 'name': OPTIONS.name}])) if REPLICAS: REPLICAS = REPLICAS[0] if 'rses' in REPLICAS: if OPTIONS.rse in REPLICAS['rses']: print("file %s with scope %s has already a replica at %s" % (OPTIONS.name, OPTIONS.scope, OPTIONS.rse)) exit() REPLICA = [{ 'scope': OPTIONS.scope, 'name' : OPTIONS.name, 'adler32': CHECKSUM, 'bytes': SIZE,
class TestDIDClients: def setup(self): self.account_client = AccountClient() self.scope_client = ScopeClient() self.meta_client = MetaClient() self.did_client = DIDClient() self.replica_client = ReplicaClient() self.rse_client = RSEClient() def test_list_dids(self): """ DATA IDENTIFIERS (CLIENT): List dids by pattern.""" tmp_scope = scope_name_generator() tmp_files = [] tmp_files.append('file_a_1%s' % generate_uuid()) tmp_files.append('file_a_2%s' % generate_uuid()) tmp_files.append('file_b_1%s' % generate_uuid()) tmp_rse = 'MOCK' self.scope_client.add_scope('jdoe', tmp_scope) for tmp_file in tmp_files: self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_a_1*'}, type='file'): results.append(result) assert_equal(len(results), 1) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file_*_1*'}, type='file'): results.append(result) assert_equal(len(results), 2) results = [] for result in self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='file'): results.append(result) assert_equal(len(results), 3) results = [] filters = {'name': 'file*', 'created_after': datetime.utcnow() - timedelta(hours=1)} for result in self.did_client.list_dids(tmp_scope, filters): results.append(result) assert_equal(len(results), 0) with assert_raises(UnsupportedOperation): self.did_client.list_dids(tmp_scope, {'name': 'file*'}, type='whateverytype') def test_list_recursive(self): """ DATA IDENTIFIERS (CLIENT): List did recursive """ # Create nested containers and datast tmp_scope_1 = 'list-did-recursive' tmp_scope_2 = 'list-did-recursive-2' self.scope_client.add_scope('root', tmp_scope_1) self.scope_client.add_scope('root', tmp_scope_2) tmp_container_1 = 'container_%s' % generate_uuid() self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_1) tmp_container_2 = 'container_%s' % generate_uuid() self.did_client.add_container(scope=tmp_scope_1, name=tmp_container_2) tmp_dataset_1 = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope_2, name=tmp_dataset_1) tmp_dataset_2 = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope_1, name=tmp_dataset_2) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_2, 'name': tmp_dataset_1}]) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_2, dids=[{'scope': tmp_scope_1, 'name': tmp_dataset_2}]) self.did_client.attach_dids(scope=tmp_scope_1, name=tmp_container_1, dids=[{'scope': tmp_scope_1, 'name': tmp_container_2}]) # List DIDs not recursive - only the first container is expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=False, type='all', filters={'name': tmp_container_1})] assert_equal(dids, [tmp_container_1]) # List DIDs recursive - first container and all attached collections are expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='all', filters={'name': tmp_container_1})] assert_true(tmp_container_1 in dids) assert_true(tmp_container_2 in dids) assert_true(tmp_dataset_1 in dids) assert_true(tmp_dataset_2 in dids) assert_equal(len(dids), 4) # List DIDs recursive - only containers are expected dids = [str(did) for did in self.did_client.list_dids(scope=tmp_scope_1, recursive=True, type='container', filters={'name': tmp_container_1})] assert_true(tmp_container_1 in dids) assert_true(tmp_container_2 in dids) assert_true(tmp_dataset_1 not in dids) assert_true(tmp_dataset_2 not in dids) assert_equal(len(dids), 2) def test_list_by_length(self): """ DATA IDENTIFIERS (CLIENT): List did with length """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) dids = self.did_client.list_dids(tmp_scope, {'length.gt': 0}) results = [] for d in dids: results.append(d) assert_not_equal(len(results), 0) dids = self.did_client.list_dids(tmp_scope, {'length.gt': -1, 'length.lt': 1}) results = [] for d in dids: results.append(d) assert_equal(len(results), 0) dids = self.did_client.list_dids(tmp_scope, {'length': 0}) results = [] for d in dids: results.append(d) assert_equal(len(results), 0) def test_list_by_metadata(self): """ DATA IDENTIFIERS (CLIENT): List did with metadata""" dsns = [] tmp_scope = 'mock' tmp_dsn1 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn1) dataset_meta = {'project': 'data12_8TeV', 'run_number': 400000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m920', } self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn1, meta=dataset_meta) tmp_dsn2 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn2) dataset_meta['run_number'] = 400001 self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn2, meta=dataset_meta) tmp_dsn3 = 'dsn_%s' % generate_uuid() dsns.append(tmp_dsn3) dataset_meta['stream_name'] = 'physics_Egamma' dataset_meta['datatype'] = 'NTUP_SMWZ' self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn3, meta=dataset_meta) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'version': 'f392_m920'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn1) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'run_number': 400001}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) dsns.remove(tmp_dsn2) dids = self.did_client.list_dids(tmp_scope, {'project': 'data12_8TeV', 'stream_name': 'physics_Egamma', 'datatype': 'NTUP_SMWZ'}) results = [] for d in dids: results.append(d) for dsn in dsns: assert_in(dsn, results) with assert_raises(KeyNotFound): self.did_client.list_dids(tmp_scope, {'NotReallyAKey': 'NotReallyAValue'}) def test_add_did(self): """ DATA IDENTIFIERS (CLIENT): Add, populate, list did content and create a sample""" tmp_scope = 'mock' tmp_rse = 'MOCK' tmp_dsn = 'dsn_%s' % generate_uuid() root = InternalAccount('root') set_local_account_limit(root, get_rse_id('MOCK'), -1) set_local_account_limit(root, get_rse_id('CERN-PROD_TZERO'), -1) # PFN example: rfio://castoratlas.cern.ch/castor/cern.ch/grid/atlas/tzero/xx/xx/xx/filename dataset_meta = {'project': 'data13_hip', 'run_number': 300000, 'stream_name': 'physics_CosmicCalo', 'prod_step': 'merge', 'datatype': 'NTUP_TRIG', 'version': 'f392_m927', } rules = [{'copies': 1, 'rse_expression': 'MOCK', 'account': 'root'}] with assert_raises(ScopeNotFound): self.did_client.add_dataset(scope='Nimportnawak', name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules) files = [{'scope': tmp_scope, 'name': 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb'}, ] with assert_raises(DataIdentifierNotFound): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files) with assert_raises(DataIdentifierNotFound): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files) files = [] for i in range(5): lfn = 'lfn.%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 10} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2), 'account': 'root'}] with assert_raises(InvalidPath): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files, rse=tmp_rse) files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files] self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, statuses={'monotonic': True}, meta=dataset_meta, rules=rules, files=files_without_pfn, rse=tmp_rse) with assert_raises(DataIdentifierAlreadyExists): self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files = [] for i in range(5): lfn = '%(tmp_dsn)s.' % locals() + str(generate_uuid()) pfn = 'mock://localhost/tmp/rucio_rse/%(project)s/%(version)s/%(prod_step)s' % dataset_meta # it doesn't work with mock: TBF # pfn = 'srm://mock2.com:2880/pnfs/rucio/disk-only/scratchdisk/rucio_tests/%(project)s/%(version)s/%(prod_step)s' % dataset_meta pfn += '%(tmp_dsn)s/%(lfn)s' % locals() file_meta = {'guid': str(generate_uuid()), 'events': 100} files.append({'scope': tmp_scope, 'name': lfn, 'bytes': 724963570, 'adler32': '0cc737eb', 'pfn': pfn, 'meta': file_meta}) rules = [{'copies': 1, 'rse_expression': 'CERN-PROD_TZERO', 'lifetime': timedelta(days=2)}] with assert_raises(InvalidPath): self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files, rse=tmp_rse) files_without_pfn = [{'scope': i['scope'], 'name': i['name'], 'bytes': i['bytes'], 'adler32': i['adler32'], 'meta': i['meta']} for i in files] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dsn, files=files_without_pfn, rse=tmp_rse) self.did_client.close(scope=tmp_scope, name=tmp_dsn) tmp_dsn_output = 'dsn_%s' % generate_uuid() self.did_client.create_did_sample(input_scope=tmp_scope, input_name=tmp_dsn, output_scope=tmp_scope, output_name=tmp_dsn_output, nbfiles=2) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=tmp_dsn_output)] assert_equal(len(files), 2) def test_attach_dids_to_dids(self): """ DATA IDENTIFIERS (CLIENT): Attach dids to dids""" tmp_scope = 'mock' tmp_rse = 'MOCK' nb_datasets = 5 nb_files = 5 attachments, dsns = list(), list() guid_to_query = None dsn = {} for i in range(nb_datasets): attachment = {} attachment['scope'] = tmp_scope attachment['name'] = 'dsn.%s' % str(generate_uuid()) attachment['rse'] = tmp_rse files = [] for i in range(nb_files): files.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) attachment['dids'] = files guid_to_query = files[0]['meta']['guid'] dsn = {'scope': tmp_scope, 'name': attachment['name']} dsns.append(dsn) attachments.append(attachment) self.did_client.add_datasets(dsns=dsns) self.did_client.attach_dids_to_dids(attachments=attachments) dsns_l = [i for i in self.did_client.get_dataset_by_guid(guid_to_query)] assert_equal([dsn], dsns_l) cnt_name = 'cnt_%s' % generate_uuid() self.did_client.add_container(scope='mock', name=cnt_name) with assert_raises(UnsupportedOperation): self.did_client.attach_dids_to_dids([{'scope': 'mock', 'name': cnt_name, 'rse': tmp_rse, 'dids': attachment['dids']}]) def test_add_files_to_datasets(self): """ DATA IDENTIFIERS (CLIENT): Add files to Datasets""" tmp_scope = 'mock' tmp_rse = 'MOCK' dsn1 = 'dsn.%s' % str(generate_uuid()) dsn2 = 'dsn.%s' % str(generate_uuid()) meta = {'transient': True} files1, files2, nb_files = [], [], 5 for i in range(nb_files): files1.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) files2.append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) self.did_client.add_dataset(scope=tmp_scope, name=dsn1, files=files1, rse=tmp_rse, meta=meta) self.did_client.add_dataset(scope=tmp_scope, name=dsn2, files=files2, rse=tmp_rse, meta=meta) attachments = [{'scope': tmp_scope, 'name': dsn1, 'dids': files2, 'rse': tmp_rse}, {'scope': tmp_scope, 'name': dsn2, 'dids': files1, 'rse': tmp_rse}] self.did_client.add_files_to_datasets(attachments) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)] assert_equal(len(files), 10) with assert_raises(FileAlreadyExists): self.did_client.add_files_to_datasets(attachments) for attachment in attachments: for i in range(nb_files): attachment['dids'].append({'scope': tmp_scope, 'name': 'lfn.%s' % str(generate_uuid()), 'bytes': 724963570, 'adler32': '0cc737eb', 'meta': {'guid': str(generate_uuid()), 'events': 100}}) self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True) files = [f for f in self.did_client.list_files(scope=tmp_scope, name=dsn1)] assert_equal(len(files), 15) # Corrupt meta-data files = [] for attachment in attachments: for file in attachment['dids']: file['bytes'] = 1000 break with assert_raises(FileConsistencyMismatch): self.did_client.add_files_to_datasets(attachments, ignore_duplicate=True) def test_add_dataset(self): """ DATA IDENTIFIERS (CLIENT): Add dataset """ tmp_scope = 'mock' tmp_dsn = 'dsn_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn, meta={'project': 'data13_hip'}) did = self.did_client.get_did(tmp_scope, tmp_dsn) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_dsn) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_add_datasets(self): """ DATA IDENTIFIERS (CLIENT): Bulk add datasets """ tmp_scope = 'mock' dsns = list() for i in range(500): tmp_dsn = {'name': 'dsn_%s' % generate_uuid(), 'scope': tmp_scope, 'meta': {'project': 'data13_hip'}} dsns.append(tmp_dsn) self.did_client.add_datasets(dsns) def test_exists(self): """ DATA IDENTIFIERS (CLIENT): Check if data identifier exists """ tmp_scope = 'mock' tmp_file = 'file_%s' % generate_uuid() tmp_rse = 'MOCK' self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') did = self.did_client.get_did(tmp_scope, tmp_file) assert_equal(did['scope'], tmp_scope) assert_equal(did['name'], tmp_file) with assert_raises(DataIdentifierNotFound): self.did_client.get_did('i_dont_exist', 'neither_do_i') def test_did_hierarchy(self): """ DATA IDENTIFIERS (CLIENT): Check did hierarchy rule """ account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(4)] cnt = ['cnt_%s' % generate_uuid() for i in range(4)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(4): self.did_client.add_did(scope, dst[i], 'DATASET', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_did(scope, cnt[i], 'CONTAINER', statuses=None, meta=None, rules=None) for i in range(4): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': cnt[2]}, {'scope': scope, 'name': cnt[3]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: pass # TODO: fix, fix, fix # if r['name'] == cnt[1]: # assert_equal(r['type'], 'container') # assert_equal(r['level'], 0) # if (r['name'] == cnt[0]) or (r['name'] == dst[0]) or (r['name'] == file[8]) or (r['name'] == file[9]): # assert_equal(r['level'], 0) # else: # assert_equal(r['level'], 1) def test_detach_did(self): """ DATA IDENTIFIERS (CLIENT): Detach dids from a did""" account = 'jdoe' rse = 'MOCK' scope = scope_name_generator() file = ['file_%s' % generate_uuid() for i in range(10)] dst = ['dst_%s' % generate_uuid() for i in range(5)] cnt = ['cnt_%s' % generate_uuid() for i in range(2)] self.scope_client.add_scope(account, scope) for i in range(10): self.replica_client.add_replica(rse, scope, file[i], 1, '0cc737eb') for i in range(5): self.did_client.add_dataset(scope, dst[i], statuses=None, meta=None, rules=None) for i in range(2): self.did_client.add_container(scope, cnt[i], statuses=None, meta=None, rules=None) for i in range(5): self.did_client.add_files_to_dataset(scope, dst[i], [{'scope': scope, 'name': file[2 * i], 'bytes': 1, 'adler32': '0cc737eb'}, {'scope': scope, 'name': file[2 * i + 1], 'bytes': 1, 'adler32': '0cc737eb'}]) self.did_client.add_containers_to_container(scope, cnt[1], [{'scope': scope, 'name': dst[2]}, {'scope': scope, 'name': dst[3]}]) with assert_raises(UnsupportedOperation): self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': cnt[1]}]) self.did_client.add_datasets_to_container(scope, cnt[0], [{'scope': scope, 'name': dst[1]}, {'scope': scope, 'name': dst[2]}]) self.did_client.detach_dids(scope, cnt[0], [{'scope': scope, 'name': dst[1]}]) self.did_client.detach_dids(scope, dst[3], [{'scope': scope, 'name': file[6]}, {'scope': scope, 'name': file[7]}]) result = self.did_client.scope_list(scope, recursive=True) for r in result: if r['name'] == dst[1]: assert_equal(r['level'], 0) if r['type'] == 'file': if (r['name'] in file[6:9]): assert_equal(r['level'], 0) else: assert_not_equal(r['level'], 0) with assert_raises(UnsupportedOperation): self.did_client.detach_dids(scope=scope, name=cnt[0], dids=[{'scope': scope, 'name': cnt[0]}]) self.did_client.close(scope, dst[4]) metadata = self.did_client.get_metadata(scope, dst[4]) i_bytes, i_length = metadata['bytes'], metadata['length'] metadata = self.did_client.get_metadata(scope, file[8]) file1_bytes = metadata['bytes'] metadata = self.did_client.get_metadata(scope, file[9]) file2_bytes = metadata['bytes'] self.did_client.detach_dids(scope, dst[4], [{'scope': scope, 'name': file[8]}, {'scope': scope, 'name': file[9]}]) metadata = self.did_client.get_metadata(scope, dst[4]) f_bytes, f_length = metadata['bytes'], metadata['length'] assert_equal(i_bytes, f_bytes + file1_bytes + file2_bytes) assert_equal(i_length, f_length + 1 + 1) def test_scope_list(self): """ DATA IDENTIFIERS (CLIENT): Add, aggregate, and list data identifiers in a scope """ # create some dummy data self.tmp_accounts = ['jdoe' for i in range(3)] self.tmp_scopes = [scope_name_generator() for i in range(3)] self.tmp_rses = [rse_name_generator() for i in range(3)] self.tmp_files = ['file_%s' % generate_uuid() for i in range(3)] self.tmp_datasets = ['dataset_%s' % generate_uuid() for i in range(3)] self.tmp_containers = ['container_%s' % generate_uuid() for i in range(3)] # add dummy data to the catalogue for i in range(3): self.scope_client.add_scope(self.tmp_accounts[i], self.tmp_scopes[i]) self.rse_client.add_rse(self.tmp_rses[i]) self.replica_client.add_replica(self.tmp_rses[i], self.tmp_scopes[i], self.tmp_files[i], 1, '0cc737eb') # put files in datasets for i in range(3): for j in range(3): files = [{'scope': self.tmp_scopes[j], 'name': self.tmp_files[j], 'bytes': 1, 'adler32': '0cc737eb'}] self.did_client.add_dataset(self.tmp_scopes[i], self.tmp_datasets[j]) self.did_client.add_files_to_dataset(self.tmp_scopes[i], self.tmp_datasets[j], files) # put datasets in containers for i in range(3): for j in range(3): datasets = [{'scope': self.tmp_scopes[j], 'name': self.tmp_datasets[j]}] self.did_client.add_container(self.tmp_scopes[i], self.tmp_containers[j]) self.did_client.add_datasets_to_container(self.tmp_scopes[i], self.tmp_containers[j], datasets) # reverse check if everything is in order for i in range(3): result = self.did_client.scope_list(self.tmp_scopes[i], recursive=True) r_topdids = [] r_otherscopedids = [] r_scope = [] for r in result: if r['level'] == 0: r_topdids.append(r['scope'] + ':' + r['name']) r_scope.append(r['scope']) if r['scope'] != self.tmp_scopes[i]: r_otherscopedids.append(r['scope'] + ':' + r['name']) assert_in(r['level'], [1, 2]) for j in range(3): assert_equal(self.tmp_scopes[i], r_scope[j]) if j != i: assert_in(self.tmp_scopes[j] + ':' + self.tmp_files[j], r_otherscopedids) assert_not_in(self.tmp_scopes[i] + ':' + self.tmp_files[i], r_topdids) def test_get_did(self): """ DATA IDENTIFIERS (CLIENT): add a new data identifier and try to retrieve it back""" rse = 'MOCK' scope = 'mock' file = generate_uuid() dsn = generate_uuid() self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb') did = self.did_client.get_did(scope, file) assert_equal(did['scope'], scope) assert_equal(did['name'], file) self.did_client.add_dataset(scope=scope, name=dsn, lifetime=10000000) did2 = self.did_client.get_did(scope, dsn) assert_equal(type(did2['expired_at']), datetime) def test_get_meta(self): """ DATA IDENTIFIERS (CLIENT): add a new meta data for an identifier and try to retrieve it back""" rse = 'MOCK' scope = 'mock' file = generate_uuid() keys = ['project', 'run_number'] values = ['data13_hip', 12345678] self.replica_client.add_replica(rse, scope, file, 1, '0cc737eb') for i in range(2): self.did_client.set_metadata(scope, file, keys[i], values[i]) meta = self.did_client.get_metadata(scope, file) for i in range(2): assert_equal(meta[keys[i]], values[i]) def test_list_content(self): """ DATA IDENTIFIERS (CLIENT): test to list contents for an identifier""" rse = 'MOCK' scope = 'mock' nbfiles = 5 dataset1 = generate_uuid() dataset2 = generate_uuid() container = generate_uuid() files1 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)] files2 = [{'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'} for i in range(nbfiles)] self.did_client.add_dataset(scope, dataset1) with assert_raises(DataIdentifierAlreadyExists): self.did_client.add_dataset(scope, dataset1) self.did_client.add_files_to_dataset(scope, dataset1, files1, rse=rse) self.did_client.add_dataset(scope, dataset2) self.did_client.add_files_to_dataset(scope, dataset2, files2, rse=rse) self.did_client.add_container(scope, container) datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}] self.did_client.add_datasets_to_container(scope, container, datasets) contents = self.did_client.list_content(scope, container) datasets_s = [d['name'] for d in contents] assert_in(dataset1, datasets_s) assert_in(dataset2, datasets_s) def test_list_files(self): """ DATA IDENTIFIERS (CLIENT): List files for a container""" rse = 'MOCK' scope = 'mock' dataset1 = generate_uuid() dataset2 = generate_uuid() container = generate_uuid() files1 = [] files2 = [] for i in range(10): files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) for i in range(10): self.replica_client.add_replica(rse, scope, files1[i]['name'], 1, '0cc737eb') self.replica_client.add_replica(rse, scope, files2[i]['name'], 1, '0cc737eb') self.did_client.add_dataset(scope, dataset1) self.did_client.add_files_to_dataset(scope, dataset1, files1) self.did_client.add_dataset(scope, dataset2) self.did_client.add_files_to_dataset(scope, dataset2, files2) datasets = [{'scope': scope, 'name': dataset1}, {'scope': scope, 'name': dataset2}] self.did_client.add_container(scope, container) self.did_client.add_datasets_to_container(scope, container, datasets) # List file content content = self.did_client.list_files(scope, files1[i]['name']) assert_true(content is not None) for d in content: assert_true(d['name'] == files1[i]['name']) # List container content for d in [{'name': x['name'], 'scope': x['scope'], 'bytes': x['bytes'], 'adler32': x['adler32']} for x in self.did_client.list_files(scope, container)]: assert_in(d, files1 + files2) # List non-existing data identifier content with assert_raises(DataIdentifierNotFound): self.did_client.list_files(scope, 'Nimportnawak') def test_list_replicas(self): """ DATA IDENTIFIERS (CLIENT): List replicas for a container""" rse = 'MOCK' scope = 'mock' dsn1 = generate_uuid() dsn2 = generate_uuid() cnt = generate_uuid() files1 = [] files2 = [] for i in range(10): files1.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) files2.append({'scope': scope, 'name': generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb'}) self.did_client.add_dataset(scope, dsn1) self.did_client.add_files_to_dataset(scope, dsn1, files1, rse=rse) self.did_client.add_dataset(scope, dsn2) self.did_client.add_files_to_dataset(scope, dsn2, files2, rse=rse) self.did_client.add_container(scope, cnt) self.did_client.add_datasets_to_container(scope, cnt, [{'scope': scope, 'name': dsn1}, {'scope': scope, 'name': dsn2}]) replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': dsn1}]) assert_true(replicas is not None) replicas = self.replica_client.list_replicas(dids=[{'scope': scope, 'name': cnt}]) assert_true(replicas is not None) @raises(UnsupportedOperation) def test_close(self): """ DATA IDENTIFIERS (CLIENT): test to close data identifiers""" tmp_rse = 'MOCK' tmp_scope = 'mock' # Add dataset tmp_dataset = 'dsn_%s' % generate_uuid() # Add file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb') # Add dataset self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Add a second file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Close dataset with assert_raises(UnsupportedStatus): self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False) self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False) # Add a third file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.attach_dids(scope=tmp_scope, name=tmp_dataset, dids=files) @raises def test_open(self): """ DATA IDENTIFIERS (CLIENT): test to re-open data identifiers for priv account""" tmp_rse = 'MOCK' tmp_scope = 'mock' # Add dataset tmp_dataset = 'dsn_%s' % generate_uuid() # Add file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(rse=tmp_rse, scope=tmp_scope, name=tmp_file, bytes=1, adler32='0cc737eb') # Add dataset self.did_client.add_dataset(scope=tmp_scope, name=tmp_dataset) # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Add a second file replica tmp_file = 'file_%s' % generate_uuid() self.replica_client.add_replica(tmp_rse, tmp_scope, tmp_file, 1, '0cc737eb') # Add files to dataset files = [{'scope': tmp_scope, 'name': tmp_file, 'bytes': 1, 'adler32': '0cc737eb'}, ] self.did_client.add_files_to_dataset(scope=tmp_scope, name=tmp_dataset, files=files) # Close dataset with assert_raises(UnsupportedStatus): self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, close=False) self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=False) # Add a third file replica self.did_client.set_status(scope=tmp_scope, name=tmp_dataset, open=True) def test_bulk_get_meta(self): """ DATA IDENTIFIERS (CLIENT): Add a new meta data for a list of DIDs and try to retrieve them back""" key = 'project' rse = 'MOCK' scope = 'mock' files = ['file_%s' % generate_uuid() for _ in range(4)] dst = ['dst_%s' % generate_uuid() for _ in range(4)] cnt = ['cnt_%s' % generate_uuid() for _ in range(4)] meta_mapping = {} list_dids = [] for idx in range(4): self.replica_client.add_replica(rse, scope, files[idx], 1, '0cc737eb') self.did_client.set_metadata(scope, files[idx], key, 'file_%s' % idx) list_dids.append({'scope': scope, 'name': files[idx]}) meta_mapping['%s:%s' % (scope, files[idx])] = (key, 'file_%s' % idx) for idx in range(4): self.did_client.add_did(scope, dst[idx], 'DATASET', statuses=None, meta={key: 'dsn_%s' % idx}, rules=None) list_dids.append({'scope': scope, 'name': dst[idx]}) meta_mapping['%s:%s' % (scope, dst[idx])] = (key, 'dsn_%s' % idx) for idx in range(4): self.did_client.add_did(scope, cnt[idx], 'CONTAINER', statuses=None, meta={key: 'cnt_%s' % idx}, rules=None) list_dids.append({'scope': scope, 'name': cnt[idx]}) meta_mapping['%s:%s' % (scope, cnt[idx])] = (key, 'cnt_%s' % idx) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] res_list_dids = [{'scope': entry['scope'], 'name': entry['name']} for entry in list_meta] res_list_dids.sort() list_dids.sort() assert_equal(list_dids, res_list_dids) for meta in list_meta: did = '%s:%s' % (meta['scope'], meta['name']) met = meta_mapping[did] assert_equal((key, meta[key]), met) cnt = ['cnt_%s' % generate_uuid() for _ in range(4)] for idx in range(4): list_dids.append({'scope': scope, 'name': cnt[idx]}) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] assert_equal(len(list_meta), 12) list_dids = [] for idx in range(4): list_dids.append({'scope': scope, 'name': cnt[idx]}) list_meta = [_ for _ in self.did_client.get_metadata_bulk(list_dids)] assert_equal(len(list_meta), 0)
class TestReplicaClients: def setup(self): self.replica_client = ReplicaClient() self.did_client = DIDClient() def test_add_list_bad_replicas(self): """ REPLICA (CLIENT): Add bad replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK') rse_id1 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason') assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id1: if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Run necromancer once run(threads=1, bulk=10000, once=True) # Try to attach a lost file tmp_dsn = 'dataset_%s' % generate_uuid() self.did_client.add_dataset(scope=tmp_scope, name=tmp_dsn) with assert_raises(UnsupportedOperation): self.did_client.add_files_to_dataset(tmp_scope, name=tmp_dsn, files=files, rse='MOCK') # Adding replicas to non-deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)] rse_info = rsemgr.get_rse_info('MOCK2') rse_id2 = rse_info['id'] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas, list_rep = [], [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) print(replicas, list_rep) r = self.replica_client.declare_bad_file_replicas(replicas, 'This is a good reason') print(r) assert_equal(r, {}) bad_replicas = list_bad_replicas() nbbadrep = 0 for rep in list_rep: for badrep in bad_replicas: if badrep['rse_id'] == rse_id2: if badrep['scope'] == rep['scope'] and badrep['name'] == rep['name']: nbbadrep += 1 assert_equal(len(replicas), nbbadrep) # Now adding non-existing bad replicas files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_bad_file_replicas(files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_add_suspicious_replicas(self): """ REPLICA (CLIENT): Add suspicious replicas""" tmp_scope = 'mock' nbfiles = 5 # Adding replicas to deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) # Listing replicas on deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason') assert_equal(r, {}) # Adding replicas to non-deterministic RSE files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'pfn': 'srm://mock2.com:8443/srm/managerv2?SFN=/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK2', files=files) # Listing replicas on non-deterministic RSE replicas = [] list_rep = [] for replica in self.replica_client.list_replicas(dids=[{'scope': f['scope'], 'name': f['name']} for f in files], schemes=['srm'], unavailable=True): replicas.extend(replica['rses']['MOCK2']) list_rep.append(replica) r = self.replica_client.declare_suspicious_file_replicas(replicas, 'This is a good reason') assert_equal(r, {}) # Now adding non-existing bad replicas files = ['srm://mock2.com/rucio/tmpdisk/rucio_tests/%s/%s' % (tmp_scope, generate_uuid()), ] r = self.replica_client.declare_suspicious_file_replicas(files, 'This is a good reason') output = ['%s Unknown replica' % rep for rep in files] assert_equal(r, {'MOCK2': output}) def test_bad_replica_methods_for_UI(self): """ REPLICA (REST): Test the listing of bad and suspicious replicas """ mw = [] headers1 = {'X-Rucio-Account': 'root', 'X-Rucio-Username': '******', 'X-Rucio-Password': '******'} r1 = TestApp(auth_app.wsgifunc(*mw)).get('/userpass', headers=headers1, expect_errors=True) assert_equal(r1.status, 200) token = str(r1.header('X-Rucio-Auth-Token')) headers2 = {'X-Rucio-Auth-Token': str(token)} data = dumps({}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_files = [] for line in r2.body.split('\n'): if line != '': tot_files.append(dumps(line)) nb_tot_files = len(tot_files) data = dumps({'state': 'B'}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_bad_files = [] for line in r2.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files1 = len(tot_bad_files) data = dumps({'state': 'S', 'list_pfns': 'True'}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_suspicious_files = [] for line in r2.body.split('\n'): if line != '': tot_suspicious_files.append(dumps(line)) nb_tot_suspicious_files = len(tot_suspicious_files) assert_equal(nb_tot_files, nb_tot_bad_files1 + nb_tot_suspicious_files) tomorrow = datetime.utcnow() + timedelta(days=1) data = dumps({'state': 'B', 'younger_than': tomorrow.isoformat()}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/states', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) tot_bad_files = [] for line in r2.body.split('\n'): if line != '': tot_bad_files.append(dumps(line)) nb_tot_bad_files = len(tot_bad_files) assert_equal(nb_tot_bad_files, 0) data = dumps({}) r2 = TestApp(rep_app.wsgifunc(*mw)).get('/bad/summary', headers=headers2, params=data, expect_errors=True) assert_equal(r2.status, 200) nb_tot_bad_files2 = 0 for line in r2.body.split('\n'): if line != '': line = loads(line) nb_tot_bad_files2 += int(line['BAD']) assert_equal(nb_tot_bad_files1, nb_tot_bad_files2) def test_add_list_replicas(self): """ REPLICA (CLIENT): Add, change state and list file replicas """ tmp_scope = 'mock' nbfiles = 5 files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files1) files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files2) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files1])] assert_equal(len(replicas), len(files1)) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['file'])] assert_equal(len(replicas), 5) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files2], schemes=['srm'])] assert_equal(len(replicas), 5) files3 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'state': 'U', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK3', files=files3) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'])] for i in range(nbfiles): assert_equal(replicas[i]['rses'], {}) files4 = [] for file in files3: file['state'] = 'A' files4.append(file) self.replica_client.update_replicas_states('MOCK3', files=files4) replicas = [r for r in self.replica_client.list_replicas(dids=[{'scope': i['scope'], 'name': i['name']} for i in files3], schemes=['file'], unavailable=True)] assert_equal(len(replicas), 5) for i in range(nbfiles): assert_in('MOCK3', replicas[i]['rses']) def test_delete_replicas(self): """ REPLICA (CLIENT): Add and delete file replicas """ tmp_scope = 'mock' nbfiles = 5 files = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] self.replica_client.add_replicas(rse='MOCK', files=files) with assert_raises(AccessDenied): self.replica_client.delete_replicas(rse='MOCK', files=files)