def test_abacus_collection_replica_cleanup(self):
        """ ABACUS (COLLECTION REPLICA): Test if the cleanup procedure works correctly. """
        collection_replica.run(once=True)
        db_session = session.get_session()
        rse1 = rse_name_generator()
        rse_id1 = add_rse(rse1, **self.vo)
        rse2 = rse_name_generator()
        rse_id2 = add_rse(rse2, **self.vo)

        scope = InternalScope('mock', **self.vo)
        dataset = 'dataset_%s' % generate_uuid()
        jdoe = InternalAccount('jdoe', **self.vo)
        add_did(scope, dataset, DIDType.DATASET, jdoe)

        models.CollectionReplica(scope=scope,
                                 name=dataset,
                                 rse_id=rse_id1,
                                 state=ReplicaState.AVAILABLE,
                                 bytes=1).save(session=db_session, flush=False)
        models.CollectionReplica(scope=scope,
                                 name=dataset,
                                 rse_id=rse_id2,
                                 state=ReplicaState.AVAILABLE,
                                 bytes=1).save(session=db_session, flush=False)

        models.UpdatedCollectionReplica(scope=scope,
                                        name=dataset,
                                        rse_id=rse_id1,
                                        did_type=DIDType.DATASET).save(
                                            session=db_session, flush=False)
        models.UpdatedCollectionReplica(scope=scope,
                                        name=dataset,
                                        rse_id=rse_id1,
                                        did_type=DIDType.DATASET).save(
                                            session=db_session, flush=False)
        models.UpdatedCollectionReplica(scope=scope,
                                        name=dataset,
                                        rse_id=rse_id2,
                                        did_type=DIDType.DATASET).save(
                                            session=db_session, flush=False)
        models.UpdatedCollectionReplica(scope=scope,
                                        name=dataset,
                                        rse_id=rse_id2,
                                        did_type=DIDType.DATASET).save(
                                            session=db_session, flush=False)
        models.UpdatedCollectionReplica(scope=scope,
                                        name=dataset,
                                        rse_id=None,
                                        did_type=DIDType.DATASET).save(
                                            session=db_session, flush=False)
        db_session.commit()
        assert len(get_cleaned_updated_collection_replicas(1, 1)) == 3
        self.did_client.set_metadata(scope.external, dataset, 'lifetime', -1)
Exemplo n.º 2
0
    def test_clean_and_get_collection_replica_updates(self):
        """ REPLICA (CORE): Get cleaned update requests for collection replicas. """
        dataset_name_with_collection_replica = 'dataset_with_rse%s' % generate_uuid()
        dataset_name_without_collection_replica = 'dataset_without_rse%s' % generate_uuid()
        add_dids(dids=[{'name': dataset_name_without_collection_replica, 'scope': self.scope, 'type': constants.DIDType.DATASET},
                       {'name': dataset_name_with_collection_replica, 'scope': self.scope, 'type': constants.DIDType.DATASET}], account=self.account, session=self.db_session)
        self.db_session.query(models.UpdatedCollectionReplica).delete()  # pylint: disable=no-member
        self.db_session.commit()  # pylint: disable=no-member

        # setup test data - 4 without corresponding replica, 4 duplicates and 2 correct
        models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, bytes=10, length=0, available_replicas_cnt=0, state=constants.ReplicaState.AVAILABLE, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET)\
            .save(session=self.db_session)
        models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(rse_id=self.rse_id, scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_without_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)
        models.UpdatedCollectionReplica(scope=self.scope, name=dataset_name_with_collection_replica, did_type=constants.DIDType.DATASET).save(session=self.db_session)

        cleaned_collection_replica_updates = get_cleaned_updated_collection_replicas(total_workers=0, worker_number=0, session=self.db_session)
        assert len(cleaned_collection_replica_updates) == 2
        for update_request in cleaned_collection_replica_updates:
            update_request = self.db_session.query(models.UpdatedCollectionReplica).filter_by(id=update_request['id']).one()  # pylint: disable=no-member
            assert update_request.scope == self.scope
            assert update_request.name in (dataset_name_with_collection_replica, dataset_name_without_collection_replica)
Exemplo n.º 3
0
    def test_update_collection_replica(self):
        """ REPLICA (CORE): Update collection replicas from update requests. """
        file_size = 2
        files = [{
            'name': 'file_%s' % generate_uuid(),
            'scope': self.scope,
            'bytes': file_size
        } for i in range(0, 2)]
        dataset_name = 'dataset_test_%s' % generate_uuid()
        add_replicas(rse_id=self.rse_id,
                     files=files,
                     account=self.account,
                     session=self.db_session)
        add_did(scope=self.scope,
                name=dataset_name,
                type=constants.DIDType.DATASET,
                account=self.account,
                session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=files,
                    account=self.account,
                    session=self.db_session)
        models.CollectionReplica(rse_id=self.rse_id, scope=self.scope, state=constants.ReplicaState.AVAILABLE, name=dataset_name, did_type=constants.DIDType.DATASET, bytes=len(files) * file_size, length=len(files), available_replicas_cnt=0)\
              .save(session=self.db_session)

        # Update request with rse id
        # First update -> dataset replica should be available
        models.UpdatedCollectionReplica(
            rse_id=self.rse_id,
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                id=update_request.id).first()  # pylint: disable=no-member
        assert update_request is None
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Delete one file replica -> dataset replica should be unavailable
        delete_replicas(rse_id=self.rse_id,
                        files=[files[0]],
                        session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE

        # Add one file replica -> dataset replica should be available again
        add_replicas(rse_id=self.rse_id,
                     files=[files[0]],
                     account=self.account,
                     session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=[files[0]],
                    account=self.account,
                    session=self.db_session)
        models.UpdatedCollectionReplica(
            rse_id=self.rse_id,
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Delete all file replicas -> dataset replica should be deleted
        delete_replicas(rse_id=self.rse_id,
                        files=files,
                        session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                rse_id=self.rse_id, scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name).all()  # pylint: disable=no-member
        assert len(dataset_replica) == 0

        # Update request without rse_id - using two replicas per file -> total 4 replicas
        add_replicas(rse_id=self.rse_id,
                     files=files,
                     account=self.account,
                     session=self.db_session)
        add_replicas(rse_id=self.rse2_id,
                     files=files,
                     account=self.account,
                     session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=files,
                    account=self.account,
                    session=self.db_session)
        models.CollectionReplica(
            rse_id=self.rse_id,
            scope=self.scope,
            name=dataset_name,
            state=constants.ReplicaState.UNAVAILABLE,
            did_type=constants.DIDType.DATASET,
            bytes=len(files) * file_size,
            length=len(files)).save(session=self.db_session)
        models.CollectionReplica(
            rse_id=self.rse2_id,
            scope=self.scope,
            name=dataset_name,
            state=constants.ReplicaState.UNAVAILABLE,
            did_type=constants.DIDType.DATASET,
            bytes=len(files) * file_size,
            length=len(files)).save(session=self.db_session)

        # First update -> replicas should be available
        models.UpdatedCollectionReplica(
            scope=self.scope, name=dataset_name).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter_by(
                scope=self.scope, name=dataset_name).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        for dataset_replica in self.db_session.query(
                models.CollectionReplica).filter_by(scope=self.scope,
                                                    name=dataset_name).all():  # pylint: disable=no-member
            assert dataset_replica['bytes'] == len(files) * file_size
            assert dataset_replica['length'] == len(files)
            assert dataset_replica['available_bytes'] == len(files) * file_size
            assert dataset_replica['available_replicas_cnt'] == len(files)
            assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Delete first replica on first RSE -> replica on first RSE should be unavailable, replica on second RSE should be still available
        delete_replicas(rse_id=self.rse_id,
                        files=[files[0]],
                        session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        # delete_replica creates also update object but with rse_id -> extra filter for rse_id is NULL
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Set the state of the first replica on the second RSE to UNAVAILABLE -> both replicass should be unavailable
        file_replica = self.db_session.query(
            models.RSEFileAssociation).filter_by(rse_id=self.rse2_id,
                                                 scope=self.scope,
                                                 name=files[0]['name']).one()  # pylint: disable=no-member
        file_replica.state = constants.ReplicaState.UNAVAILABLE
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE

        # Delete first replica on second RSE -> file is not longer part of dataset -> both replicas should be available
        delete_replicas(rse_id=self.rse2_id,
                        files=[files[0]],
                        session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == (len(files) - 1) * file_size
        assert dataset_replica['length'] == len(files) - 1
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == (len(files) - 1) * file_size
        assert dataset_replica['length'] == len(files) - 1
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.AVAILABLE

        # Add first replica to the first RSE -> first replicas should be available
        add_replicas(rse_id=self.rse_id,
                     files=[files[0]],
                     account=self.account,
                     session=self.db_session)
        attach_dids(scope=self.scope,
                    name=dataset_name,
                    dids=[files[0]],
                    account=self.account,
                    session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == (len(files) -
                                                      1) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files) - 1
        assert dataset_replica['state'] == ReplicaState.UNAVAILABLE

        # Add first replica to the second RSE -> both replicas should be available again
        add_replicas(rse_id=self.rse2_id,
                     files=[files[0]],
                     account=self.account,
                     session=self.db_session)
        models.UpdatedCollectionReplica(
            scope=self.scope,
            name=dataset_name,
            did_type=constants.DIDType.DATASET).save(session=self.db_session)
        update_request = self.db_session.query(
            models.UpdatedCollectionReplica).filter(
                models.UpdatedCollectionReplica.scope == self.scope,
                models.UpdatedCollectionReplica.name == dataset_name,  # pylint: disable=no-member
                models.UpdatedCollectionReplica.rse_id.is_(None)).one()  # pylint: disable=no-member
        update_collection_replica(update_request=update_request.to_dict(),
                                  session=self.db_session)
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE
        dataset_replica = self.db_session.query(
            models.CollectionReplica).filter_by(scope=self.scope,
                                                name=dataset_name,
                                                rse_id=self.rse2_id).one()  # pylint: disable=no-member
        assert dataset_replica['bytes'] == len(files) * file_size
        assert dataset_replica['length'] == len(files)
        assert dataset_replica['available_bytes'] == len(files) * file_size
        assert dataset_replica['available_replicas_cnt'] == len(files)
        assert dataset_replica['state'] == ReplicaState.AVAILABLE