def test_touch_replicas(self): """ REPLICA (CORE): Touch replicas accessed_at timestamp""" tmp_scope = 'mock' nbfiles = 5 files1 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] files2 = [{'scope': tmp_scope, 'name': 'file_%s' % generate_uuid(), 'bytes': 1, 'adler32': '0cc737eb', 'meta': {'events': 10}} for i in range(nbfiles)] files2.append(files1[0]) add_replicas(rse='MOCK', files=files1, account='root', ignore_availability=True) add_replicas(rse='MOCK', files=files2, account='root', ignore_availability=True) now = datetime.utcnow() now -= timedelta(microseconds=now.microsecond) assert_equal(None, get_replica_atime({'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK'})) assert_equal(None, get_did_atime(scope=tmp_scope, name=files1[0]['name'])) for r in [{'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK', 'accessed_at': now}]: touch_replica(r) assert_equal(now, get_replica_atime({'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK'})) assert_equal(now, get_did_atime(scope=tmp_scope, name=files1[0]['name'])) for i in range(1, nbfiles): assert_equal(None, get_replica_atime({'scope': files1[i]['scope'], 'name': files1[i]['name'], 'rse': 'MOCK'})) for i in range(0, nbfiles - 1): assert_equal(None, get_replica_atime({'scope': files2[i]['scope'], 'name': files2[i]['name'], 'rse': 'MOCK'}))
def __update_atime(self): """ Bulk update atime. """ replicas = [] rses = [] for report in self.__reports: if 'vo' not in report: report['vo'] = 'def' try: # Identify suspicious files try: if self.__bad_files_patterns and report['eventType'] in [ 'get_sm', 'get_sm_a', 'get' ] and 'clientState' in report and report[ 'clientState'] not in [ 'DONE', 'FOUND_ROOT', 'ALREADY_DONE' ]: for pattern in self.__bad_files_patterns: if 'stateReason' in report and report[ 'stateReason'] and isinstance( report['stateReason'], str) and pattern.match( report['stateReason']): reason = report['stateReason'][:255] if 'url' not in report or not report['url']: self.__logger( logging.ERROR, 'Missing url in the following trace : ' + str(report)) else: try: surl = report['url'] declare_bad_file_replicas( [ surl, ], reason=reason, issuer=InternalAccount( 'root', vo=report['vo']), status=BadFilesStatus.SUSPICIOUS) self.__logger( logging.INFO, 'Declare suspicious file %s with reason %s' % (report['url'], reason)) except Exception as error: self.__logger( logging.ERROR, 'Failed to declare suspicious file' + str(error)) except Exception as error: self.__logger( logging.ERROR, 'Problem with bad trace : %s . Error %s' % (str(report), str(error))) # check if scope in report. if not skip this one. if 'scope' not in report: record_counter('daemons.tracer.kronos.missing_scope') if report['eventType'] != 'touch': continue else: record_counter('daemons.tracer.kronos.with_scope') report['scope'] = InternalScope(report['scope'], report['vo']) # handle all events starting with get* and download and touch events. if not report['eventType'].startswith('get') and not report[ 'eventType'].startswith('sm_get') and not report[ 'eventType'] == 'download' and not report[ 'eventType'] == 'touch': continue if report['eventType'].endswith('_es'): continue record_counter('daemons.tracer.kronos.total_get') if report['eventType'] == 'get': record_counter('daemons.tracer.kronos.dq2clients') elif report['eventType'] == 'get_sm' or report[ 'eventType'] == 'sm_get': if report['eventVersion'] == 'aCT': record_counter( 'daemons.tracer.kronos.panda_production_act') else: record_counter( 'daemons.tracer.kronos.panda_production') elif report['eventType'] == 'get_sm_a' or report[ 'eventType'] == 'sm_get_a': if report['eventVersion'] == 'aCT': record_counter( 'daemons.tracer.kronos.panda_analysis_act') else: record_counter('daemons.tracer.kronos.panda_analysis') elif report['eventType'] == 'download': record_counter('daemons.tracer.kronos.rucio_download') elif report['eventType'] == 'touch': record_counter('daemons.tracer.kronos.rucio_touch') else: record_counter('daemons.tracer.kronos.other_get') if report['eventType'] == 'download' or report[ 'eventType'] == 'touch': report['usrdn'] = report['account'] if report['usrdn'] in self.__excluded_usrdns: continue # handle touch and non-touch traces differently if report['eventType'] != 'touch': # check if the report has the right state. if 'eventVersion' in report: if report['eventVersion'] != 'aCT': if report['clientState'] in self.__excluded_states: continue if 'remoteSite' not in report: continue if not report['remoteSite']: continue if 'filename' not in report: if 'name' in report: report['filename'] = report['name'] rses = report['remoteSite'].strip().split(',') for rse in rses: try: rse_id = get_rse_id(rse=rse, vo=report['vo']) except RSENotFound: self.__logger( logging.WARNING, "Cannot lookup rse_id for %s. Will skip this report.", rse) record_counter( 'daemons.tracer.kronos.rse_not_found') continue replicas.append({ 'name': report['filename'], 'scope': report['scope'], 'rse': rse, 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']), 'traceTimeentryUnix': report['traceTimeentryUnix'], 'eventVersion': report['eventVersion'] }) else: # if touch event and if datasetScope is in the report then it means # that there is no file scope/name and therefore only the dataset is # put in the queue to be updated and the rest is skipped. rse_id = None rse = None if 'remoteSite' in report: rse = report['remoteSite'] try: rse_id = get_rse_id(rse=rse, vo=report['vo']) except RSENotFound: self.__logger(logging.WARNING, "Cannot lookup rse_id for %s.", rse) record_counter( 'daemons.tracer.kronos.rse_not_found') if 'datasetScope' in report: self.__dataset_queue.put({ 'scope': InternalScope(report['datasetScope'], vo=report['vo']), 'name': report['dataset'], 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']) }) continue else: if 'remoteSite' not in report: continue replicas.append({ 'name': report['filename'], 'scope': report['scope'], 'rse': rse, 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']) }) except (KeyError, AttributeError): self.__logger(logging.ERROR, "Cannot handle report.", exc_info=True) record_counter('daemons.tracer.kronos.report_error') continue except Exception: self.__logger(logging.ERROR, "Exception", exc_info=True) continue for did in list_parent_dids(report['scope'], report['filename']): if did['type'] != DIDType.DATASET: continue # do not update _dis datasets if did['scope'].external == 'panda' and '_dis' in did['name']: continue for rse in rses: try: rse_id = get_rse_id(rse=rse, vo=report['vo']) except RSENotFound: self.__logger( logging.WARNING, "Cannot lookup rse_id for %s. Will skip this report.", rse) record_counter('daemons.tracer.kronos.rse_not_found') continue self.__dataset_queue.put({ 'scope': did['scope'], 'name': did['name'], 'did_type': did['type'], 'rse_id': rse_id, 'accessed_at': datetime.utcfromtimestamp(report['traceTimeentryUnix']) }) if not len(replicas): return self.__logger(logging.DEBUG, "trying to update replicas: %s", replicas) try: start_time = time() for replica in replicas: # if touch replica hits a locked row put the trace back into queue for later retry if not touch_replica(replica): resubmit = { 'filename': replica['name'], 'scope': replica['scope'].external, 'remoteSite': replica['rse'], 'traceTimeentryUnix': replica['traceTimeentryUnix'], 'eventType': 'get', 'usrdn': 'someuser', 'clientState': 'DONE', 'eventVersion': replica['eventVersion'] } if replica['scope'].vo != 'def': resubmit['vo'] = replica['scope'].vo self.__conn.send(body=jdumps(resubmit), destination=self.__queue, headers={ 'appversion': 'rucio', 'resubmitted': '1' }) record_counter('daemons.tracer.kronos.sent_resubmitted') self.__logger(logging.WARNING, 'hit locked row, resubmitted to queue') record_timer('daemons.tracer.kronos.update_atime', (time() - start_time) * 1000) except Exception: self.__logger(logging.ERROR, "Cannot update replicas.", exc_info=True) record_counter('daemons.tracer.kronos.update_error') self.__logger(logging.INFO, 'updated %d replica(s)' % len(replicas))
def __update_atime(self): """ Bulk update atime. """ replicas = [] rses = [] for report in self.__reports: try: # check if scope in report. if not skip this one. if 'scope' not in report: record_counter('daemons.tracer.kronos.missing_scope') if report['eventType'] != 'touch': continue else: record_counter('daemons.tracer.kronos.with_scope') # handle all events starting with get* and download and touch events. if not report['eventType'].startswith('get') and not report[ 'eventType'].startswith('sm_get') and not report[ 'eventType'] == 'download' and not report[ 'eventType'] == 'touch': continue if report['eventType'].endswith('_es'): continue record_counter('daemons.tracer.kronos.total_get') if report['eventType'] == 'get': record_counter('daemons.tracer.kronos.dq2clients') elif report['eventType'] == 'get_sm' or report[ 'eventType'] == 'sm_get': if report['eventVersion'] == 'aCT': record_counter( 'daemons.tracer.kronos.panda_production_act') else: record_counter( 'daemons.tracer.kronos.panda_production') elif report['eventType'] == 'get_sm_a' or report[ 'eventType'] == 'sm_get_a': if report['eventVersion'] == 'aCT': record_counter( 'daemons.tracer.kronos.panda_analysis_act') else: record_counter('daemons.tracer.kronos.panda_analysis') elif report['eventType'] == 'download': record_counter('daemons.tracer.kronos.rucio_download') elif report['eventType'] == 'touch': record_counter('daemons.tracer.kronos.rucio_touch') else: record_counter('daemons.tracer.kronos.other_get') if report['eventType'] == 'download' or report[ 'eventType'] == 'touch': report['usrdn'] = report['account'] if report['usrdn'] in self.__excluded_usrdns: continue # handle touch and non-touch traces differently if report['eventType'] != 'touch': # check if the report has the right state. if 'eventVersion' in report: if report['eventVersion'] != 'aCT': if report['clientState'] in self.__excluded_states: continue if 'remoteSite' not in report: continue if not report['remoteSite']: continue if 'filename' not in report: if 'name' in report: report['filename'] = report['name'] rses = report['remoteSite'].strip().split(',') for rse in rses: replicas.append({ 'name': report['filename'], 'scope': report['scope'], 'rse': rse, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']), 'traceTimeentryUnix': report['traceTimeentryUnix'], 'eventVersion': report['eventVersion'] }) else: # if touch event and if datasetScope is in the report then it means # that there is no file scope/name and therefore only the dataset is # put in the queue to be updated and the rest is skipped. if 'datasetScope' in report: rse = None if 'remoteSite' in report: rse = report['remoteSite'] self.__dataset_queue.put({ 'scope': report['datasetScope'], 'name': report['dataset'], 'rse': rse, 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']) }) continue else: if 'remoteSite' not in report: continue replicas.append({ 'name': report['filename'], 'scope': report['scope'], 'rse': report['remoteSite'], 'accessed_at': datetime.utcfromtimestamp( report['traceTimeentryUnix']) }) except (KeyError, AttributeError): logging.error(format_exc()) record_counter('daemons.tracer.kronos.report_error') continue for did in list_parent_dids(report['scope'], report['filename']): if did['type'] != DIDType.DATASET: continue # do not update _dis datasets if did['scope'] == 'panda' and '_dis' in did['name']: continue for rse in rses: self.__dataset_queue.put({ 'scope': did['scope'], 'name': did['name'], 'did_type': did['type'], 'rse': rse, 'accessed_at': datetime.utcfromtimestamp(report['traceTimeentryUnix']) }) logging.debug(replicas) try: ts = time() for replica in replicas: # if touch replica hits a locked row put the trace back into queue for later retry if not touch_replica(replica): resubmit = { 'filename': replica['name'], 'scope': replica['scope'], 'remoteSite': replica['rse'], 'traceTimeentryUnix': replica['traceTimeentryUnix'], 'eventType': 'get', 'usrdn': 'someuser', 'clientState': 'DONE', 'eventVersion': replica['eventVersion'] } self.__conn.send(body=jdumps(resubmit), destination=self.__queue, headers={ 'appversion': 'rucio', 'resubmitted': '1' }) record_counter('daemons.tracer.kronos.sent_resubmitted') logging.warning( '(kronos_file) hit locked row, resubmitted to queue') record_timer('daemons.tracer.kronos.update_atime', (time() - ts) * 1000) except: logging.error(format_exc()) record_counter('daemons.tracer.kronos.update_error') logging.info('(kronos_file) updated %d replicas' % len(replicas))
None, get_replica_atime({ 'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK' })) assert_equal(None, get_did_atime(scope=tmp_scope, name=files1[0]['name'])) for r in [{ 'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK', 'accessed_at': now }]: touch_replica(r) assert_equal( now, get_replica_atime({ 'scope': files1[0]['scope'], 'name': files1[0]['name'], 'rse': 'MOCK' })) assert_equal(now, get_did_atime(scope=tmp_scope, name=files1[0]['name'])) for i in range(1, nbfiles): assert_equal( None, get_replica_atime({