def test_clean_and_get_collection_replica_updates(self): """ REPLICA (CORE): Get cleaned update requests for collection replicas. """ dataset_name_with_collection_replica = 'dataset_with_rse%s' % generate_uuid() dataset_name_without_collection_replica = 'dataset_without_rse%s' % generate_uuid() add_dids(dids=[{'name': dataset_name_without_collection_replica, 'scope': self.scope, 'type': constants.DIDType.DATASET}, {'name': dataset_name_with_collection_replica, 'scope': self.scope, 'type': constants.DIDType.DATASET}], account=self.account, session=self.db_session) self.db_session.query(models.UpdatedCollectionReplica).delete() # pylint: disable=no-member self.db_session.commit() # pylint: disable=no-member # setup test data - 4 without corresponding replica, 4 duplicates and 2 correct models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, bytes=10, length=0, available_replicas_cnt=0, state=constants.ReplicaState.AVAILABLE, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET)\ .save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session) cleaned_collection_replica_updates = get_cleaned_updated_collection_replicas(total_workers=0, worker_number=0, session=self.db_session) assert len(cleaned_collection_replica_updates) == 2 for update_request in cleaned_collection_replica_updates: update_request = self.db_session.query(models.UpdatedCollectionReplica).filter_by(id=update_request['id']).one() # pylint: disable=no-member assert update_request.scope == self.scope assert update_request.name in (dataset_name_with_collection_replica, dataset_name_without_collection_replica)
def test_abacus_collection_replica_cleanup(self): """ ABACUS (COLLECTION REPLICA): Test if the cleanup procedure works correctly. """ collection_replica.run(once=True) db_session = session.get_session() rse1 = rse_name_generator() rse_id1 = add_rse(rse1, **self.vo) rse2 = rse_name_generator() rse_id2 = add_rse(rse2, **self.vo) scope = InternalScope('mock', **self.vo) dataset = 'dataset_%s' % generate_uuid() jdoe = InternalAccount('jdoe', **self.vo) add_did(scope, dataset, DIDType.DATASET, jdoe) models.CollectionReplica(scope=scope, name=dataset, rse_id=rse_id1, state=ReplicaState.AVAILABLE, bytes=1).save(session=db_session, flush=False) models.CollectionReplica(scope=scope, name=dataset, rse_id=rse_id2, state=ReplicaState.AVAILABLE, bytes=1).save(session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id1, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id1, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id2, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=rse_id2, did_type=DIDType.DATASET).save( session=db_session, flush=False) models.UpdatedCollectionReplica(scope=scope, name=dataset, rse_id=None, did_type=DIDType.DATASET).save( session=db_session, flush=False) db_session.commit() assert len(get_cleaned_updated_collection_replicas(1, 1)) == 3 self.did_client.set_metadata(scope.external, dataset, 'lifetime', -1)
def successful_transfer(scope, name, rse_id, nowait, session=None): """ Update the state of all replica locks because of an successful transfer :param scope: Scope of the did :param name: Name of the did :param rse_id: RSE id :param nowait: Nowait parameter for the for_update queries. :param session: DB Session. """ locks = session.query(models.ReplicaLock).with_for_update( nowait=nowait).filter_by(scope=scope, name=name, rse_id=rse_id) for lock in locks: if lock.state == LockState.OK: continue logging.debug( 'Marking lock %s:%s for rule %s on rse %s as OK' % (lock.scope, lock.name, str(lock.rule_id), str(lock.rse_id))) # Update the rule counters rule = session.query(models.ReplicationRule).with_for_update( nowait=nowait).filter_by(id=lock.rule_id).one() logging.debug('Updating rule counters for rule %s [%d/%d/%d]' % (str(rule.id), rule.locks_ok_cnt, rule.locks_replicating_cnt, rule.locks_stuck_cnt)) if lock.state == LockState.REPLICATING: rule.locks_replicating_cnt -= 1 elif lock.state == LockState.STUCK: rule.locks_stuck_cnt -= 1 rule.locks_ok_cnt += 1 lock.state = LockState.OK logging.debug( 'Finished updating rule counters for rule %s [%d/%d/%d]' % (str(rule.id), rule.locks_ok_cnt, rule.locks_replicating_cnt, rule.locks_stuck_cnt)) # Insert UpdatedCollectionReplica if rule.did_type == DIDType.DATASET: models.UpdatedCollectionReplica(scope=rule.scope, name=rule.name, did_type=rule.did_type, rse_id=rse_id).save( flush=False, session=session) elif rule.did_type == DIDType.CONTAINER: # Resolve to all child datasets for dataset in rucio.core.did.list_child_datasets(scope=rule.scope, name=rule.name, session=session): models.UpdatedCollectionReplica(scope=dataset['scope'], name=dataset['name'], did_type=dataset['type'], rse_id=rse_id).save( flush=False, session=session) # Update the rule state if rule.state == RuleState.SUSPENDED: pass elif rule.locks_stuck_cnt > 0: pass elif rule.locks_replicating_cnt == 0 and rule.state == RuleState.REPLICATING: rule.state = RuleState.OK # Try to update the DatasetLocks if rule.grouping != RuleGrouping.NONE: ds_locks = session.query(models.DatasetLock).with_for_update( nowait=nowait).filter_by(rule_id=rule.id) for ds_lock in ds_locks: ds_lock.state = LockState.OK session.flush() rucio.core.rule.generate_message_for_dataset_ok_callback( rule=rule, session=session) if rule.notification == RuleNotification.YES: rucio.core.rule.generate_email_for_rule_ok_notification( rule=rule, session=session) # Try to release potential parent rules rucio.core.rule.release_parent_rule(child_rule_id=rule.id, session=session) # Insert rule history rucio.core.rule.insert_rule_history(rule=rule, recent=True, longterm=False, session=session) session.flush()
def test_update_collection_replica(self): """ REPLICA (CORE): Update collection replicas from update requests. """ file_size = 2 files = [{ 'name': 'file_%s' % generate_uuid(), 'scope': self.scope, 'bytes': file_size } for i in range(0, 2)] dataset_name = 'dataset_test_%s' % generate_uuid() add_replicas(rse_id=self.rse_id, files=files, account=self.account, session=self.db_session) add_did(scope=self.scope, name=dataset_name, type=constants.DIDType.DATASET, account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=files, account=self.account, session=self.db_session) models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, state=constants.ReplicaState.AVAILABLE, name=dataset_name, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files), available_replicas_cnt=0)\ .save(session=self.db_session) # Update request with rse id # First update -> dataset replica should be available models.UpdatedCollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( id=update_request.id).first() # pylint: disable=no-member assert update_request is None dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete one file replica -> dataset replica should be unavailable delete_replicas(rse_id=self.rse_id, files=[files[0]], session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Add one file replica -> dataset replica should be available again add_replicas(rse_id=self.rse_id, files=[files[0]], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete all file replicas -> dataset replica should be deleted delete_replicas(rse_id=self.rse_id, files=files, session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( rse_id=self.rse_id, scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).all() # pylint: disable=no-member assert len(dataset_replica) == 0 # Update request without rse_id - using two replicas per file -> total 4 replicas add_replicas(rse_id=self.rse_id, files=files, account=self.account, session=self.db_session) add_replicas(rse_id=self.rse2_id, files=files, account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=files, account=self.account, session=self.db_session) models.CollectionReplica( rse_id=self.rse_id, scope=self.scope, name=dataset_name, state=constants.ReplicaState.UNAVAILABLE, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files)).save(session=self.db_session) models.CollectionReplica( rse_id=self.rse2_id, scope=self.scope, name=dataset_name, state=constants.ReplicaState.UNAVAILABLE, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files)).save(session=self.db_session) # First update -> replicas should be available models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter_by( scope=self.scope, name=dataset_name).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) for dataset_replica in self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name).all(): # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Delete first replica on first RSE -> replica on first RSE should be unavailable, replica on second RSE should be still available delete_replicas(rse_id=self.rse_id, files=[files[0]], session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) # delete_replica creates also update object but with rse_id -> extra filter for rse_id is NULL update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE # Set the state of the first replica on the second RSE to UNAVAILABLE -> both replicass should be unavailable file_replica = self.db_session.query( models.RSEFileAssociation).filter_by(rse_id=self.rse2_id, scope=self.scope, name=files[0]['name']).one() # pylint: disable=no-member file_replica.state = constants.ReplicaState.UNAVAILABLE models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Delete first replica on second RSE -> file is not longer part of dataset -> both replicas should be available delete_replicas(rse_id=self.rse2_id, files=[files[0]], session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == (len(files) - 1) * file_size assert dataset_replica['length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == (len(files) - 1) * file_size assert dataset_replica['length'] == len(files) - 1 assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.AVAILABLE # Add first replica to the first RSE -> first replicas should be available add_replicas(rse_id=self.rse_id, files=[files[0]], account=self.account, session=self.db_session) attach_dids(scope=self.scope, name=dataset_name, dids=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == (len(files) - 1) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) - 1 assert dataset_replica['state'] == ReplicaState.UNAVAILABLE # Add first replica to the second RSE -> both replicas should be available again add_replicas(rse_id=self.rse2_id, files=[files[0]], account=self.account, session=self.db_session) models.UpdatedCollectionReplica( scope=self.scope, name=dataset_name, did_type=constants.DIDType.DATASET).save(session=self.db_session) update_request = self.db_session.query( models.UpdatedCollectionReplica).filter( models.UpdatedCollectionReplica.scope == self.scope, models.UpdatedCollectionReplica.name == dataset_name, # pylint: disable=no-member models.UpdatedCollectionReplica.rse_id.is_(None)).one() # pylint: disable=no-member update_collection_replica(update_request=update_request.to_dict(), session=self.db_session) dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE dataset_replica = self.db_session.query( models.CollectionReplica).filter_by(scope=self.scope, name=dataset_name, rse_id=self.rse2_id).one() # pylint: disable=no-member assert dataset_replica['bytes'] == len(files) * file_size assert dataset_replica['length'] == len(files) assert dataset_replica['available_bytes'] == len(files) * file_size assert dataset_replica['available_replicas_cnt'] == len(files) assert dataset_replica['state'] == ReplicaState.AVAILABLE