def add_rule(self,run_number, dtype, hash, from_rse, to_rse, datum=None, lifetime=None, update_db=True): did = make_did(run_number, dtype, hash) if dtype in self.HIGH_LEVEL_TYPES: priority = 1 else: priority = 3 result = self.rc.AddConditionalRule(did, from_rse, to_rse, lifetime=lifetime, priority=priority) #if result == 1: # return helper.global_dictionary['logger'].Info('\t==> Run {0}, data type {1}: conditional rule added: {2} ---> {3}'.format(run_number,dtype,did,to_rse)) if update_db: self.db.db.find_one_and_update({'number': run_number}, {'$set': {'status': 'transferring'}} ) rucio_rule = self.rc.GetRule(did, rse=to_rse) updated_fields = {'host': "rucio-catalogue", 'type': dtype, 'location': to_rse, 'lifetime': rucio_rule['expires'], 'status': 'transferring', 'did': did, 'protocol': 'rucio' } if datum == None: data_dict = updated_fields else: data_dict = datum.copy() data_dict.update(updated_fields) docid = self.db.db.find_one({'number': run_number}, {'_id': 1})['_id'] self.db.AddDatafield(docid, data_dict)
def add_rule(run_number, dtype, hash, rse, lifetime=None, update_db=True): did = make_did(run_number, dtype, hash) rc = RucioSummoner() result = rc.AddRule(did, rse, lifetime=lifetime) #if result == 1: # return print(f"Rule Added: {did} ---> {rse}") if update_db: rucio_rule = rc.GetRule(did, rse=rse) data_dict = { 'host': "rucio-catalogue", 'type': dtype, 'location': rse, 'lifetime': rucio_rule['expires'], 'status': 'transferring', 'did': did, 'protocol': 'rucio' } DB.db.find_one_and_update({'number': run_number}, {'$set': { 'status': 'transferring' }}) docid = DB.db.find_one({'number': run_number}, {'_id': 1})['_id'] DB.AddDatafield(docid, data_dict)
def run(self,*args, **kwargs): # helper.global_dictionary['logger'].Info(f'Run task {self.__class__.__name__}') # Get a new dataset to upload id_to_upload, datum = self.get_dataset_to_upload_from_manager() if id_to_upload == 0: # helper.global_dictionary['logger'].Info('\t==> No data type available to upload') return 0 # Get the run run = self.db.db.find_one({'_id': id_to_upload}, {'number': 1, 'data': 1}) # Get info from the screen session process = psutil.Process() screen = process.parent().parent().parent().parent().cmdline()[-1] # Building the did number = run['number'] dtype = datum['type'] file = datum['location'].split('/')[-1] hash = file.split('-')[-1] did = make_did(number, dtype, hash) eb = datum['host'].split('.')[0] helper.global_dictionary['logger'].Info('\t==> Screen {0}. Uploading did {1} from host {2}'.format(screen,did,eb)) # Modify data type status to "transferring" self.db.db.find_one_and_update({'_id': id_to_upload, 'data': {'$elemMatch': {'type' : datum['type'], 'location' : datum['location'], 'host' : datum['host'] }}}, { '$set': { "data.$.status" : "transferring" } }) # Check, for coherency, if there is no rucio entry in DB for this data type in_rucio_upload_rse = False in_rucio_somewhere_else = False for d in run['data']: if d['type'] == datum['type'] and d['host'] == 'rucio-catalogue' and hash in d['did'] and d['location'] == self.UPLOAD_TO: in_rucio_upload_rse = True if d['type'] == datum['type'] and d['host'] == 'rucio-catalogue' and hash in d['did'] and d['location'] != self.UPLOAD_TO: in_rucio_somewhere_else = True if in_rucio_upload_rse: helper.global_dictionary['logger'].Info('\t==> Screen {0}. Run {1}, data type {2} has already a DB entry for RSE {3}. Forced to stop'.format(screen,number,dtype,self.UPLOAD_TO)) self.reset_upload_to_manager() return 0 if in_rucio_somewhere_else: helper.global_dictionary['logger'].Info('\t==> Screen {0}. Run {1}, data type {2} has already a DB entry for some external RSE. Forced to stop'.format(screen,number,dtype)) self.reset_upload_to_manager() return 0 # Querying Rucio: if a rule exists already for this DID on LNGS, skip uploading rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) if rucio_rule['exists']: helper.global_dictionary['logger'].Info('\t==> Screen {0}. Run {1}, data type {2} has already a Rucio rule for RSE {3}. Forced to stop'.format(screen,number,dtype,self.UPLOAD_TO)) self.reset_upload_to_manager() return 0 # Building the full path of data to upload upload_path = os.path.join(self.DATADIR, eb, file) # Finally, start uploading with Rucio result = self.rc.Upload(did, upload_path, self.UPLOAD_TO, lifetime=None) helper.global_dictionary['logger'].Info('\t==> Screen {0}. Uploading did {1} from host {2} done'.format(screen,did,eb)) # Wait for 10 seconds time.sleep(10) # Checking the status of this new upload rule rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) if rucio_rule['state'] != 'OK': helper.global_dictionary['logger'].Info('\t==> Screen {0}. Run {1}, data type {2}, according to Rucio, uploading failed. Forced to stop'.format(screen, number,dtype)) exit() # Modify data type status to "transferred" self.db.db.find_one_and_update({'_id': id_to_upload, 'data': {'$elemMatch': {'type' : datum['type'], 'location' : datum['location'], 'host' : datum['host'] }}}, { '$set': { "data.$.status" : "transferred" } }) # Add a new data field with LNGS as RSE and with status "trasferred" data_dict = datum.copy() data_dict.update({'host': "rucio-catalogue", 'type': dtype, 'location': self.UPLOAD_TO, 'lifetime': rucio_rule['expires'], 'status': 'transferred', 'did': did, 'protocol': 'rucio' }) self.db.AddDatafield(run['_id'], data_dict) # set a rule to ship data on GRID if rucio_rule['state'] == 'OK': rses = [self.UPLOAD_TO] if dtype in self.RAW_RECORDS_TPC_TYPES: rses = rses + self.RAW_RECORDS_TPC_RSES if dtype in self.RAW_RECORDS_MV_TYPES: rses = rses + self.RAW_RECORDS_MV_RSES if dtype in self.RAW_RECORDS_NV_TYPES: rses = rses + self.RAW_RECORDS_NV_RSES if dtype in self.LIGHT_RAW_RECORDS_TPC_TYPES: rses = rses + self.LIGHT_RAW_RECORDS_TPC_RSES if dtype in self.LIGHT_RAW_RECORDS_MV_TYPES: rses = rses + self.LIGHT_RAW_RECORDS_MV_RSES if dtype in self.LIGHT_RAW_RECORDS_NV_TYPES: rses = rses + self.LIGHT_RAW_RECORDS_NV_RSES if dtype in self.HIGH_LEVEL_TYPES: rses = rses + self.HIGH_LEVEL_RSES if dtype in self.RECORDS_TYPES: rses = rses + self.RECORDS_RSES for from_rse, to_rse in zip(rses, rses[1:]): to_rule = self.rc.GetRule(upload_structure=did, rse=to_rse) if not to_rule['exists']: self.add_rule(number, dtype, hash, from_rse, to_rse, datum=datum) # unbook the did self.reset_upload_to_manager() return 0
def run(self, *args, **kwargs): helper.global_dictionary['logger'].Info( f'Run task {self.__class__.__name__}') data_types = self.RAW_RECORDS_TPC_TYPES + self.RAW_RECORDS_MV_TYPES + self.RAW_RECORDS_NV_TYPES + self.LIGHT_RAW_RECORDS_TPC_TYPES + self.LIGHT_RAW_RECORDS_MV_TYPES + self.LIGHT_RAW_RECORDS_NV_TYPES + self.HIGH_LEVEL_TYPES + self.RECORDS_TYPES # Get all runs that are already transferred and that still have some data_types in eb cursor = self.db.db.find( { # 'number': {"$lt": 7600, "$gte": 7500}, # 'number': {"$lt": 7600, "$gte": 7200}, # 'number': {"$lt": 8570, "$gte": 8550}, # 'number': {"$gte": 7330}, # 'number': {"$gte": 8500}, 'number': { "$gte": 10800 }, # 'number': {"$gte": 8013}, # 'number': 8075, # 'data' : { "$elemMatch": { "host" : {"$regex" : ".*eb.*"} , "type" : {"$in" : data_types}} }, # 'status': 'transferred' 'status': { '$in': ['transferred', 'transferring'] } }, { '_id': 1, 'number': 1, 'data': 1, 'bootstrax': 1 }) cursor = list(cursor) # helper.global_dictionary['logger'].Info('Runs that will be processed are {0}'.format([c["number"] for c in cursor])) helper.global_dictionary['logger'].Info( 'Runs that will be processed are {0}'.format(len(cursor))) # Runs over all listed runs for run in cursor: #Gets the run number number = run['number'] # Extracts the correct Event Builder machine who processed this run bootstrax = run['bootstrax'] eb = bootstrax['host'].split('.')[0] # helper.global_dictionary['logger'].Info('Treating run {0}'.format(number)) # helper.global_dictionary['logger'].Info('Run {0} has been processed by {1}'.format(number,eb)) # Checks how much date are old if 'time' in run['bootstrax']: run_time = run['bootstrax']['time'].replace( tzinfo=timezone.utc) now_time = datetime.now().replace(tzinfo=timezone.utc) delta_time = now_time - run_time else: delta_time = timedelta(days=self.minimum_deltadays_allowed) # Loops on all datatypes that have to be cleaned for dtype in data_types: # helper.global_dictionary['logger'].Info('\t==> Looking for data type {0}'.format(dtype)) # checks the age of the data type is_enough_old = True # for some data types, it they are not yet older than three days, it skips deleting them if dtype in self.dtype_delayed_delete: if delta_time < timedelta( days=self.minimum_deltadays_allowed): helper.global_dictionary['logger'].Info( 'Run {0}, data type {1} is not yet older than {2} days. Skip it' .format(number, dtype, self.minimum_deltadays_allowed)) is_enough_old = False # for some heavy data types (records and raw_records), if they are not yet older than one day, it skips deleting them if dtype in self.dtype_delayed_delete_heavy: if delta_time < timedelta( days=self.minimum_deltadays_allowed_heavy): helper.global_dictionary['logger'].Info( 'Run {0}, data type {1} is not yet older than {2} days. Skip it' .format(number, dtype, self.minimum_deltadays_allowed_heavy)) is_enough_old = False # check first with runDB if the data type already exists in external RSEs rses_in_db = [] for d in run['data']: if d['type'] == dtype and d[ 'host'] == 'rucio-catalogue' and d[ 'location'] != self.UPLOAD_TO and d[ 'status'] == 'transferred': rses_in_db.append(d['location']) # helper.global_dictionary['logger'].Info('\t==> According to DB, found in following external RSEs : {0}'.format(rses_in_db)) # if this is not the case, just skip any attempt of deleting anything if len(rses_in_db) < self.minimum_number_acceptable_rses: # helper.global_dictionary['logger'].Info('\t==> Nothing will be deleted : not enough external RSEs') continue # check first if data are, according to the DB, still in EB datum = None for d in run['data']: if d['type'] == dtype and eb in d['host']: datum = d # skip this data type in case the eb status field is missing or is different from "transferred" if datum is not None and ('status' not in datum or datum['status'] != "transferred"): continue # # Phase 1 : Deleting data in EB # # if datum is None: # helper.global_dictionary['logger'].Info('Data type not in eb') # start deleting data in EB if datum is not None and dtype not in self.dtype_never_delete and is_enough_old: file = datum['location'].split('/')[-1] hash = file.split('-')[-1] # create the DID from DB did = make_did(number, dtype, hash) # check if a rule already exists with this exact DID in external RSEs # and take also the number of files in each RSE rses_with_rule = [] rses_with_correct_nfiles = [] for rse in self.RSES: rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse) if rucio_rule['exists'] and rucio_rule['state'] == 'OK': if self.UPLOAD_TO == rucio_rule['rse']: continue rses_with_rule.append(rucio_rule['rse']) nfiles = len( list_file_replicas(number, dtype, hash, rucio_rule['rse'])) if 'file_count' in datum: if nfiles == datum['file_count']: rses_with_correct_nfiles.append( rucio_rule['rse']) # helper.global_dictionary['logger'].Info('\t==> According to Rucio, found in following external RSEs : {0}'.format(rses_with_rule)) # if len(rses_with_correct_nfiles) == len(rses_with_rule): # helper.global_dictionary['logger'].Info('\t==> All of them with the expected number of files') # else: # helper.global_dictionary['logger'].Info('\t==> Error, these RSEs have wrong number of files : {0}'.format(rses_with_correct_nfiles)) # if so, start deleting # if len(rses_with_rule)>=self.minimum_number_acceptable_rses and len(rses_with_correct_nfiles) == len(rses_with_rule): if len(rses_with_rule ) >= self.minimum_number_acceptable_rses and len( rses_with_correct_nfiles ) >= self.minimum_number_acceptable_rses: # if len(rses_with_rule)>=self.minimum_number_acceptable_rses: # delete from DB # print(run['_id'],datum['type'],datum['host']) self.db.RemoveDatafield(run['_id'], datum) full_path = os.path.join(self.DATADIR, eb, file) # print(full_path) helper.global_dictionary['logger'].Info( '\t==> Run {0}, data type {1}. Deleted EB info from DB' .format(number, dtype)) # delete from disk try: shutil.rmtree(full_path) except OSError as e: helper.global_dictionary['logger'].Info( '\t==> Error, cannot delete directory : {0}'. format(e)) else: helper.global_dictionary['logger'].Info( '\t==> Run {0}, data type {1}. Deleted data from EB disk' .format(number, dtype)) # # Phase 2 : Deleting data in LNGS_USERDISK # # check if data are, according to the DB, still in datamanager (LNGS_USERDISK) datum = None for d in run['data']: if d['type'] == dtype and d[ 'host'] == 'rucio-catalogue' and self.UPLOAD_TO in d[ 'location']: datum = d # if so, start deleting data in datamanager (LNGS_USERDISK) # if datum is None: # helper.global_dictionary['logger'].Info('Data type not in LNGS_USERDISK') if datum is not None: # create the DID from DB did = datum['did'] hash = did.split('-')[-1] nfiles_upload_to = len( list_file_replicas(number, dtype, hash, self.UPLOAD_TO)) # check if a rule already exists with this exact DID in external RSEs rses_with_rule = [] rses_with_correct_nfiles = [] for rse in self.RSES: rucio_rule = self.rc.GetRule(upload_structure=did, rse=rse) if rucio_rule['exists'] and rucio_rule['state'] == 'OK': if self.UPLOAD_TO == rucio_rule['rse']: continue rses_with_rule.append(rucio_rule['rse']) nfiles = len( list_file_replicas(number, dtype, hash, rucio_rule['rse'])) if nfiles == nfiles_upload_to: rses_with_correct_nfiles.append( rucio_rule['rse']) # helper.global_dictionary['logger'].Info('\t==> According to Rucio, found in following external RSEs : {0}'.format(rses_with_rule)) # if len(rses_with_correct_nfiles) == len(rses_with_rule): # helper.global_dictionary['logger'].Info('\t==> All of them with the expected number of files') # else: # helper.global_dictionary['logger'].Info('\t==> Error, these RSEs have wrong number of files : {0}'.format(rses_with_correct_nfiles)) # if so, start deleting # if len(rses_with_rule)>=self.minimum_number_acceptable_rses and len(rses_with_correct_nfiles) == len(rses_with_rule): if len(rses_with_rule ) >= self.minimum_number_acceptable_rses and len( rses_with_correct_nfiles ) >= self.minimum_number_acceptable_rses: rucio_rule = self.rc.GetRule(upload_structure=did, rse=self.UPLOAD_TO) if rucio_rule['exists'] and rucio_rule[ 'state'] == 'OK' and rucio_rule[ 'rse'] == self.UPLOAD_TO: self.rc.DeleteRule(rucio_rule['id']) helper.global_dictionary['logger'].Info( '\t==> Run {0}, data type {1}. Deleted LNGS_USERDISK Rucio rule' .format(number, dtype)) hash = did.split('-')[-1] files = list_file_replicas(number, dtype, hash, "LNGS_USERDISK") for file in files: os.remove(file) helper.global_dictionary['logger'].Info( '\t==> Run {0}, data type {1}. Deleted data from LNGS_USERDISK disk' .format(number, dtype)) self.db.RemoveDatafield(run['_id'], datum) helper.global_dictionary['logger'].Info( '\t==> Run {0}, data type {1}. Deleted LNGS_USERDISK info from DB' .format(number, dtype)) return 0
def do_upload(periodic_check=300): #rc_reader_path = "/home/datamanager/software/admix/admix/config/xenonnt_format.config" #rc_reader = ConfigRucioDataFormat() #rc_reader.Config(rc_reader_path) rc = RucioSummoner() # get the data to upload ids_to_upload = find_data_to_upload() cursor = DB.db.find( { '_id': { "$in": ids_to_upload }, #'number': 7157 }, { 'number': 1, 'data': 1, 'dids': 1 }) cursor = list(cursor) # check transfers check_transfers() last_check = time.time() for run in cursor: number = run['number'] print(f"\n\nUploading run {number}") for dtype in DTYPES: print(f"\t==> Uploading {dtype}") # get the datum for this datatype datum = None in_rucio = False for d in run['data']: if d['type'] == dtype and 'eb' in d['host']: datum = d if d['type'] == dtype and d['host'] == 'rucio-catalogue': in_rucio = True if datum is None: print(f"Data type {dtype} not found for run {number}") continue file = datum['location'].split('/')[-1] hash = file.split('-')[-1] upload_path = os.path.join(DATADIR, file) # create a DID to upload did = make_did(number, dtype, hash) # check if a rule already exists for this DID on LNGS rucio_rule = rc.GetRule(upload_structure=did, rse="LNGS_USERDISK") # if not in rucio already and no rule exists, upload into rucio if not in_rucio and not rucio_rule['exists']: result = rc.Upload(did, upload_path, 'LNGS_USERDISK', lifetime=None) print("Dataset uploaded.") # if upload was successful, tell runDB rucio_rule = rc.GetRule(upload_structure=did, rse="LNGS_USERDISK") data_dict = { 'host': "rucio-catalogue", 'type': dtype, 'location': 'LNGS_USERDISK', 'lifetime': rucio_rule['expires'], 'status': 'transferred', 'did': did, 'protocol': 'rucio' } if rucio_rule['state'] == 'OK': if not in_rucio: DB.AddDatafield(run['_id'], data_dict) # add a DID list that's easy to query by DB.GetDid # check if did field exists yet or not # if not run.get('dids'): # DB.db.find_one_and_update({'_id': run['_id']}, # {'$set': {'dids': {dtype: did}}} # ) # else: # print("Updating DID list") # DB.db.find_one_and_update({'_id': run['_id']}, # {'$set': {'dids.%s' % dtype: did}} # ) # add rule to OSG and Nikhef # TODO make this configurable for rse in ['UC_OSG_USERDISK']: add_rule(number, dtype, rse) # finally, delete the eb copy #remove_from_eb(number, dtype) if time.time() - last_check > periodic_check: check_transfers() last_check = time.time()