Exemplo n.º 1
0
def test_dereference_missing_upload(api, users, location, task_delay):
    with api.test_request_context():
        # Assemble a segmented upload from parts, and complete it
        segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create(
            {}
        )
        MultipartObject.create(
            bucket=segmented_upload_record.bucket,
            key="some-key",
            size=15,
            chunk_size=10,
        )

        record: SWORDDeposit = SWORDDeposit.create({})
        record.set_by_reference_files(
            [
                ByReferenceFileDefinition(
                    temporary_id=segmented_upload_record.id,
                    content_disposition="attachment; filename=something.txt",
                    content_type="text/plain",
                    packaging=PackagingFormat.Binary,
                    dereference=True,
                ),
            ],
            lambda *args: True,
            "http://localhost/",
        )

        object_version = ObjectVersion.query.one()

        with pytest.raises(ValueError):
            tasks.dereference_object(record.id, object_version.version_id)

        assert TagManager(object_version)[ObjectTagKey.FileState] == FileState.Error
def test_multipart_last_part(app, db, bucket):
    """Test multipart creation."""
    mp = MultipartObject.create(bucket, 'test.txt', 100, 20)
    assert mp.last_part_size == 0
    assert mp.last_part_number == 4
    mp = MultipartObject.create(bucket, 'test.txt', 101, 20)
    assert mp.last_part_size == 1
    assert mp.last_part_number == 5
def test_multipart_last_part(app, db, bucket):
    """Test multipart creation."""
    mp = MultipartObject.create(bucket, 'test.txt', 100, 20)
    assert mp.last_part_size == 0
    assert mp.last_part_number == 4
    mp = MultipartObject.create(bucket, 'test.txt', 101, 20)
    assert mp.last_part_size == 1
    assert mp.last_part_number == 5
Exemplo n.º 4
0
    def delete(self, *args, **kwargs):
        """Delete the deposit."""
        if self['_deposit'].get('pid'):
            raise PIDInvalidAction()

        # Delete reserved recid.
        pid_recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        if pid_recid.status == PIDStatus.RESERVED:
            db.session.delete(pid_recid)

        # Completely remove bucket
        q = RecordsBuckets.query.filter_by(record_id=self.id)
        bucket = q.one().bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            q.delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.remove()

        return super(ZenodoDeposit, self).delete(*args, **kwargs)
Exemplo n.º 5
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(pid_type='recid',
                                         pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(
                    mp_q.with_entities(
                        MultipartObject.upload_id).subquery())).delete(
                            synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
Exemplo n.º 6
0
    def delete(self, *args, **kwargs):
        """Delete the deposit."""
        if self['_deposit'].get('pid'):
            raise PIDInvalidAction()

        # Delete reserved recid.
        pid_recid = PersistentIdentifier.get(pid_type='recid',
                                             pid_value=self['recid'])

        if pid_recid.status == PIDStatus.RESERVED:
            db.session.delete(pid_recid)

        # Completely remove bucket
        q = RecordsBuckets.query.filter_by(record_id=self.id)
        bucket = q.one().bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            q.delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(
                    mp_q.with_entities(
                        MultipartObject.upload_id).subquery())).delete(
                            synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.remove()

        return super(ZenodoDeposit, self).delete(*args, **kwargs)
Exemplo n.º 7
0
def test_post_by_reference_segmented(api, users, location, task_delay):
    with api.test_request_context(), api.test_client() as client:
        # Assemble a segmented upload from parts, and complete it
        segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create(
            {}
        )
        multipart_object = MultipartObject.create(
            bucket=segmented_upload_record.bucket,
            key="some-key",
            size=15,
            chunk_size=10,
        )
        Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij"))
        Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno"))
        multipart_object.complete()

        login(client)

        ttl = (
            datetime.datetime.now(tz=datetime.timezone.utc)
            + datetime.timedelta(0, 3600)
        ).isoformat()

        response = client.post(
            "/sword/service-document",
            data=json.dumps(
                {
                    "@context": JSON_LD_CONTEXT,
                    "@type": "ByReference",
                    "byReferenceFiles": [
                        {
                            "@id": f"http://localhost/sword/staging/{segmented_upload_record.id}",
                            "contentDisposition": "attachment; filename=some-resource.json",
                            "contentType": "application/json",
                            "dereference": True,
                            "ttl": ttl,
                        }
                    ],
                }
            ),
            headers={
                "Content-Disposition": "attachment; by-reference=true",
                "Content-Type": "application/ld+json",
            },
        )

        assert response.status_code == HTTPStatus.CREATED

        object_version = ObjectVersion.query.one()
        tags = TagManager(object_version)

        assert tags == {
            ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary",
            ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id),
            ObjectTagKey.FileState: FileState.Pending,
            ObjectTagKey.ByReferenceDereference: "true",
            ObjectTagKey.ByReferenceNotDeleted: "true",
            ObjectTagKey.OriginalDeposit: "true",
            ObjectTagKey.ByReferenceTTL: ttl,
        }
Exemplo n.º 8
0
    def delete(self, delete_published=False, *args, **kwargs):
        """Delete the deposit.

        :param delete_published: If True, even deposit of a published record
            will be deleted (usually used by admin operations).
        :type delete_published: bool
        """
        is_published = self['_deposit'].get('pid')
        if is_published and not delete_published:
            raise PIDInvalidAction()

        # Delete the recid
        recid = PersistentIdentifier.get(
            pid_type='recid', pid_value=self['recid'])

        versioning = PIDVersioning(child=recid)
        if versioning.exists:
            if versioning.draft_child and \
                    self.pid == versioning.draft_child_deposit:
                versioning.remove_draft_child()
            if versioning.last_child:
                index_siblings(versioning.last_child,
                               children=versioning.children.all(),
                               include_pid=True,
                               neighbors_eager=True,
                               with_deposits=True)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        if 'conceptrecid' in self:
            concept_recid = PersistentIdentifier.get(
                pid_type='recid', pid_value=self['conceptrecid'])
            if concept_recid.status == PIDStatus.RESERVED:
                db.session.delete(concept_recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(mp_q.with_entities(
                    MultipartObject.upload_id).subquery())
            ).delete(synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        depid = kwargs.get('pid', self.pid)
        if depid:
            depid.delete()

        # NOTE: We call the parent of Deposit, invenio_records.api.Record since
        # we need to completely override eveything that the Deposit.delete
        # method does.
        return super(Deposit, self).delete(*args, **kwargs)
def test_multipart_creation(app, db, bucket):
    """Test multipart creation."""
    mp = MultipartObject.create(bucket, 'test.txt', 100, 20)
    db.session.commit()
    assert mp.upload_id
    assert mp.size == 100
    assert mp.chunk_size == 20
    assert mp.completed is False
    assert mp.bucket.size == 100
    assert exists(mp.file.uri)
def test_multipart_creation(app, db, bucket):
    """Test multipart creation."""
    mp = MultipartObject.create(bucket, 'test.txt', 100, 20)
    db.session.commit()
    assert mp.upload_id
    assert mp.size == 100
    assert mp.chunk_size == 20
    assert mp.completed is False
    assert mp.bucket.size == 100
    assert exists(mp.file.uri)
Exemplo n.º 11
0
def test_by_reference_sets_tag(api, users, location, task_delay):
    with api.test_request_context():
        # Assemble a segmented upload from parts, and complete it
        segmented_upload_record: SegmentedUploadRecord = SegmentedUploadRecord.create(
            {}
        )
        multipart_object = MultipartObject.create(
            bucket=segmented_upload_record.bucket,
            key="some-key",
            size=15,
            chunk_size=10,
        )
        Part.create(multipart_object, 0, stream=io.BytesIO(b"abcdefghij"))
        Part.create(multipart_object, 1, stream=io.BytesIO(b"klmno"))
        multipart_object.complete()

        record: SWORDDeposit = SWORDDeposit.create({})
        record.set_by_reference_files(
            [
                ByReferenceFileDefinition(
                    temporary_id=segmented_upload_record.id,
                    content_disposition="attachment; filename=something.txt",
                    content_type="text/plain",
                    packaging=PackagingFormat.Binary,
                    dereference=True,
                ),
            ],
            lambda *args: True,
            "http://localhost/",
        )

        object_version = ObjectVersion.query.one()
        tags = TagManager(object_version)

        assert tags == {
            ObjectTagKey.OriginalDeposit: "true",
            ObjectTagKey.ByReferenceTemporaryID: str(segmented_upload_record.id),
            ObjectTagKey.Packaging: "http://purl.org/net/sword/3.0/package/Binary",
            ObjectTagKey.FileState: FileState.Pending,
            ObjectTagKey.ByReferenceDereference: "true",
            ObjectTagKey.ByReferenceNotDeleted: "true",
        }

        tasks.dereference_object(record.id, object_version.version_id)

        assert object_version.file.storage().open().read() == b"abcdefghijklmno"
def test_multipart_full(app, db, bucket):
    """Test full multipart object."""
    app.config.update(
        dict(
            FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024,
            FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024,
        ))

    # Initial parameters
    chunks = 20
    chunk_size = 5 * 1024 * 1024  # 5 MiB
    last_chunk = 1024 * 1024  # 1 MiB
    size = (chunks - 1) * chunk_size + last_chunk

    # Initiate
    mp = MultipartObject.create(bucket,
                                'testfile',
                                size=size,
                                chunk_size=chunk_size)
    db.session.commit()

    # Create parts
    for i in range(chunks):
        part_size = chunk_size if i < chunks - 1 else last_chunk
        Part.create(mp, i, stream=make_stream(part_size))
        db.session.commit()

    # Complete
    mp.complete()
    db.session.commit()

    # Merge parts.
    pre_size = mp.bucket.size
    mp.merge_parts()
    db.session.commit()

    # Test size update
    bucket = Bucket.get(bucket.id)
    assert bucket.size == pre_size

    app.config.update(
        dict(
            FILES_REST_MULTIPART_CHUNKSIZE_MIN=2,
            FILES_REST_MULTIPART_CHUNKSIZE_MAX=20,
        ))
def test_multipart_full(app, db, bucket):
    """Test full multipart object."""
    app.config.update(dict(
        FILES_REST_MULTIPART_CHUNKSIZE_MIN=5 * 1024 * 1024,
        FILES_REST_MULTIPART_CHUNKSIZE_MAX=5 * 1024 * 1024 * 1024,
    ))

    # Initial parameters
    chunks = 20
    chunk_size = 5 * 1024 * 1024  # 5 MiB
    last_chunk = 1024 * 1024  # 1 MiB
    size = (chunks - 1) * chunk_size + last_chunk

    # Initiate
    mp = MultipartObject.create(
        bucket, 'testfile', size=size, chunk_size=chunk_size)
    db.session.commit()

    # Create parts
    for i in range(chunks):
        part_size = chunk_size if i < chunks - 1 else last_chunk
        Part.create(mp, i, stream=make_stream(part_size))
        db.session.commit()

    # Complete
    mp.complete()
    db.session.commit()

    # Merge parts.
    pre_size = mp.bucket.size
    mp.merge_parts()
    db.session.commit()

    # Test size update
    bucket = Bucket.get(bucket.id)
    assert bucket.size == pre_size

    app.config.update(dict(
        FILES_REST_MULTIPART_CHUNKSIZE_MIN=2,
        FILES_REST_MULTIPART_CHUNKSIZE_MAX=20,
    ))
Exemplo n.º 14
0
def test_part_creation(app, db, bucket, get_sha256):
    """Test part creation."""
    assert bucket.size == 0
    mp = MultipartObject.create(bucket, 'test.txt', 5, 2)
    db.session.commit()
    assert bucket.size == 5

    Part.create(mp, 2, stream=BytesIO(b'p'))
    Part.create(mp, 0, stream=BytesIO(b'p1'))
    Part.create(mp, 1, stream=BytesIO(b'p2'))
    db.session.commit()
    assert bucket.size == 5

    mp.complete()
    db.session.commit()
    assert bucket.size == 5

    # Assert checksum of part.
    m = hashlib.sha256()
    m.update(b'p2')
    assert "sha256:{0}".format(m.hexdigest()) == \
        Part.get_or_none(mp, 1).checksum

    obj = mp.merge_parts()
    db.session.commit()
    assert bucket.size == 5

    assert MultipartObject.query.count() == 0
    assert Part.query.count() == 0

    assert obj.file.size == 5
    assert obj.file.checksum == get_sha256(b'p1p2p')
    assert obj.file.storage().open().read() == b'p1p2p'
    assert obj.file.writable is False
    assert obj.file.readable is True

    assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
def test_part_creation(app, db, bucket, get_md5):
    """Test part creation."""
    assert bucket.size == 0
    mp = MultipartObject.create(bucket, 'test.txt', 5, 2)
    db.session.commit()
    assert bucket.size == 5

    Part.create(mp, 2, stream=BytesIO(b'p'))
    Part.create(mp, 0, stream=BytesIO(b'p1'))
    Part.create(mp, 1, stream=BytesIO(b'p2'))
    db.session.commit()
    assert bucket.size == 5

    mp.complete()
    db.session.commit()
    assert bucket.size == 5

    # Assert checksum of part.
    m = hashlib.md5()
    m.update(b'p2')
    assert "md5:{0}".format(m.hexdigest()) == Part.get_or_none(mp, 1).checksum

    obj = mp.merge_parts()
    db.session.commit()
    assert bucket.size == 5

    assert MultipartObject.query.count() == 0
    assert Part.query.count() == 0

    assert obj.file.size == 5
    assert obj.file.checksum == get_md5(b'p1p2p')
    assert obj.file.storage().open().read() == b'p1p2p'
    assert obj.file.writable is False
    assert obj.file.readable is True

    assert obj.version_id == ObjectVersion.get(bucket, 'test.txt').version_id
Exemplo n.º 16
0
Arquivo: api.py Projeto: mhaya/weko
    def delete(self, force=True, pid=None):
        """Delete deposit.

        Status required: ``'draft'``.

        :param force: Force deposit delete.  (Default: ``True``)
        :param pid: Force pid object.  (Default: ``None``)
        :returns: A new Deposit object.
        """
        # Delete the recid
        recid = PersistentIdentifier.get(pid_type='recid',
                                         pid_value=self.pid.pid_value)

        if recid.status == PIDStatus.RESERVED:
            db.session.delete(recid)

        # if this item has been deleted
        self.delete_es_index_attempt(recid)

        # Completely remove bucket
        bucket = self.files.bucket
        with db.session.begin_nested():
            # Remove Record-Bucket link
            RecordsBuckets.query.filter_by(record_id=self.id).delete()
            mp_q = MultipartObject.query_by_bucket(bucket)
            # Remove multipart objects
            Part.query.filter(
                Part.upload_id.in_(
                    mp_q.with_entities(
                        MultipartObject.upload_id).subquery())).delete(
                            synchronize_session='fetch')
            mp_q.delete(synchronize_session='fetch')
        bucket.locked = False
        bucket.remove()

        return super(Deposit, self).delete()
Exemplo n.º 17
0
def multipart(db, bucket):
    """Multipart object."""
    mp = MultipartObject.create(bucket, 'mykey', 110, 20)
    db.session.commit()
    return mp
Exemplo n.º 18
0
def multipart(db, bucket):
    """Multipart object."""
    mp = MultipartObject.create(bucket, 'mykey', 110, 20)
    db.session.commit()
    return mp
Exemplo n.º 19
0
 def multipart_files(self):
     """Get all multipart files."""
     return MultipartObject.query_by_bucket(self.files.bucket)
Exemplo n.º 20
0
Arquivo: api.py Projeto: zenodo/zenodo
 def multipart_files(self):
     """Get all multipart files."""
     return MultipartObject.query_by_bucket(self.files.bucket)