def test_get_empty_bucket(db, client, headers, bucket, objects, permissions,
                          get_json):
    """Test getting objects from an empty bucket."""
    # Delete the objects created in the fixtures to have an empty bucket with
    # permissions set up.
    for obj in objects:
        ObjectVersion.delete(obj.bucket_id, obj.key)
    db.session.commit()

    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),  # TODO - return 403 instead
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        resp = client.get(
            url_for('invenio_files_rest.bucket_api', bucket_id=bucket.id),
            headers=headers
        )
        assert resp.status_code == expected
        if resp.status_code == 200:
            assert get_json(resp)['contents'] == []
示例#2
0
def dispose_object_version(object_version):
    """Clean up resources related to an ObjectVersion."""
    if object_version:
        object_version = as_object_version(object_version)
        # remove the object version
        ObjectVersion.delete(bucket=object_version.bucket,
                             key=object_version.key)
示例#3
0
def test_bucket_sync(app, db, dummy_location):
    """Test that a bucket is correctly synced."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum")
    ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum")
    ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum")
    ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum")
    db.session.commit()

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 4

    ObjectVersion.delete(b1, "filename1")
    ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum")
    ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum")
    ObjectVersion.delete(b2, "extra3")
    db.session.commit()

    b1.sync(b2, delete_extras=True)

    assert ObjectVersion.get_by_bucket(b1).count() == 2
    assert ObjectVersion.get_by_bucket(b2).count() == 2
示例#4
0
def rename_file(recid, key, new_key):
    """Remove a file from a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket

    obj = ObjectVersion.get(bucket, key)
    if obj is None:
        click.echo(click.style(u'File with key "{key}" not found.'.format(
            key=key), fg='red'))
        return

    new_obj = ObjectVersion.get(bucket, new_key)
    if new_obj is not None:
        click.echo(click.style(u'File with key "{key}" already exists.'.format(
            key=new_key), fg='red'))
        return

    if click.confirm(u'Rename "{key}" to "{new_key}" on bucket {bucket}.'
                     u' Continue?'.format(
                        key=obj.key, new_key=new_key, bucket=bucket.id)):
        record.files.bucket.locked = False

        file_id = obj.file.id
        ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, new_key, _file_id=file_id)
        record.files.bucket.locked = True
        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File renamed successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file rename.', fg='green'))
示例#5
0
def test_get_empty_bucket(db, client, headers, bucket, objects, permissions,
                          get_json):
    """Test getting objects from an empty bucket."""
    # Delete the objects created in the fixtures to have an empty bucket with
    # permissions set up.
    for obj in objects:
        ObjectVersion.delete(obj.bucket_id, obj.key)
    db.session.commit()

    cases = [
        (None, 404),
        ('auth', 404),
        ('objects', 404),  # TODO - return 403 instead
        ('bucket', 200),
        ('location', 200),
    ]

    for user, expected in cases:
        login_user(client, permissions[user])

        resp = client.get(url_for('invenio_files_rest.bucket_api',
                                  bucket_id=bucket.id),
                          headers=headers)
        assert resp.status_code == expected
        if resp.status_code == 200:
            assert get_json(resp)['contents'] == []
示例#6
0
文件: cli.py 项目: xbee/zenodo
def add_file(recid, fp, replace_existing):
    """Add a new file to a published record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    key = os.path.basename(fp.name)

    obj = ObjectVersion.get(bucket, key)
    if obj is not None and not replace_existing:
        click.echo(
            click.style(
                u'File with key "{key}" already exists.'
                u' Use `--replace-existing/-f` to overwrite it.'.format(
                    key=key, recid=recid),
                fg='red'))
        return

    fp.seek(SEEK_SET, SEEK_END)
    size = fp.tell()
    fp.seek(SEEK_SET)

    click.echo(u'Will add the following file:\n')
    click.echo(
        click.style(u'  key: "{key}"\n'
                    u'  bucket: {bucket}\n'
                    u'  size: {size}\n'
                    u''.format(key=key, bucket=bucket.id, size=size),
                    fg='green'))
    click.echo(u'to record:\n')
    click.echo(
        click.style(u'  Title: "{title}"\n'
                    u'  RECID: {recid}\n'
                    u'  UUID: {uuid}\n'
                    u''.format(recid=record['recid'],
                               title=record['title'],
                               uuid=record.id),
                    fg='green'))
    if replace_existing and obj is not None:
        click.echo(u'and remove the file:\n')
        click.echo(
            click.style(u'  key: "{key}"\n'
                        u'  bucket: {bucket}\n'
                        u'  size: {size}\n'
                        u''.format(key=obj.key,
                                   bucket=obj.bucket,
                                   size=obj.file.size),
                        fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        if obj is not None and replace_existing:
            ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, key, stream=fp, size=size)
        bucket.locked = True

        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File added successfully.', fg='green'))
    else:
        click.echo(click.style(u'File addition aborted.', fg='green'))
示例#7
0
def test_sync_records_with_deposits(app, db, location, users,
                                    project_deposit_metadata,
                                    video_deposit_metadata):
    """Test sync records with deposits task."""
    # create a project
    project = Project.create(project_deposit_metadata)
    project_deposit_metadata['report_number'] = ['123']
    # create new video
    video_deposit_metadata['_project_id'] = project['_deposit']['id']
    deposit = Video.create(video_deposit_metadata)
    depid = deposit['_deposit']['id']

    # insert objects inside the deposit
    ObjectVersion.create(deposit.files.bucket,
                         "obj_1").set_location("mylocation1", 1, "mychecksum1")
    ObjectVersion.create(deposit.files.bucket,
                         "obj_2").set_location("mylocation2", 1, "mychecksum2")
    ObjectVersion.create(deposit.files.bucket,
                         "obj_3").set_location("mylocation3", 1, "mychecksum3")
    obj_4 = ObjectVersion.create(deposit.files.bucket,
                                 "obj_4").set_location("mylocation4", 1,
                                                       "mychecksum4")

    # publish
    login_user(User.query.get(users[0]))
    prepare_videos_for_publish([deposit])
    deposit = deposit.publish()
    _, record = deposit.fetch_published()
    assert deposit.is_published() is True

    # add a new object
    ObjectVersion.create(deposit.files.bucket,
                         "obj_new").set_location("mylocation_new", 1,
                                                 "mychecksum")
    # modify obj_1
    ObjectVersion.create(deposit.files.bucket,
                         "obj_new").set_location("mylocation2.1", 1,
                                                 "mychecksum2.1")
    # delete obj_3
    ObjectVersion.delete(deposit.files.bucket, "obj_3")
    # remove obj_4
    obj_4.remove()

    # check video and record
    files = ['obj_1', 'obj_2', 'obj_3', 'obj_4']
    edited_files = ['obj_1', 'obj_2', 'obj_3', 'obj_new']
    check_deposit_record_files(deposit, edited_files, record, files)

    # try to sync deposit and record
    sync_records_with_deposit_files.s(deposit_id=depid).apply_async()

    # get deposit and record
    deposit = deposit_video_resolver(depid)
    _, record = deposit.fetch_published()
    assert deposit.is_published() is True

    # check that record and deposit are sync
    re_edited_files = edited_files + ['obj_4']
    check_deposit_record_files(deposit, edited_files, record, re_edited_files)
示例#8
0
def test_deposit_prepare_edit(api_app, db, location, project_deposit_metadata):
    """Test deposit prepare edit."""
    # create new deposit
    project_deposit_metadata['report_number'] = ['123']
    deposit = CDSDeposit.create(project_deposit_metadata,
                                bucket_location=location.name)
    assert deposit.is_published() is False
    assert deposit.has_record() is False
    # insert objects inside the deposit
    ObjectVersion.create(deposit.files.bucket,
                         "obj_1").set_location("mylocation1", 1, "mychecksum1")
    ObjectVersion.create(deposit.files.bucket,
                         "obj_2").set_location("mylocation2", 1, "mychecksum2")
    ObjectVersion.create(deposit.files.bucket,
                         "obj_3").set_location("mylocation3", 1, "mychecksum3")
    obj_4 = ObjectVersion.create(deposit.files.bucket,
                                 "obj_4").set_location("mylocation4", 1,
                                                       "mychecksum4")

    # publish
    deposit = deposit.publish()
    _, record = deposit.fetch_published()
    assert deposit.is_published() is True
    assert deposit.has_record() is True
    # check record bucket is locked
    files = ['obj_1', 'obj_2', 'obj_3', 'obj_4']
    check_deposit_record_files(deposit, files, record, files)

    # add a new object
    ObjectVersion.create(deposit.files.bucket,
                         "obj_new").set_location("mylocation_new", 1,
                                                 "mychecksum")
    # modify obj_1
    ObjectVersion.create(deposit.files.bucket,
                         "obj_new").set_location("mylocation2.1", 1,
                                                 "mychecksum2.1")
    # delete obj_3
    ObjectVersion.delete(deposit.files.bucket, "obj_3")
    # remove obj_4
    obj_4.remove()

    # check deposit and record
    edited_files = ['obj_1', 'obj_2', 'obj_3', 'obj_new']
    check_deposit_record_files(deposit, edited_files, record, files)

    # edit
    deposit = deposit.edit()
    assert deposit.is_published() is False
    assert deposit.has_record() is True
    # check the situation
    check_deposit_record_files(deposit, edited_files, record, files)

    # publish again
    deposit = deposit.publish()
    assert deposit.is_published() is True
    assert deposit.has_record() is True
    # check that record and deposit are sync
    re_edited_files = edited_files + ['obj_4']
    check_deposit_record_files(deposit, edited_files, record, re_edited_files)
示例#9
0
def delete_object_version(obj: ObjectVersion):
    """Delete file on filesystem and soft delete on database."""
    current_app.logger.debug(f"Delete Object Version: {str(obj)}")

    #  Soft delete bucket
    obj.delete(obj.bucket, obj.key)

    delete_file_instance(obj)

    db.session.commit()
def test_deposit_vtt_tags(api_app, db, api_project, users):
    """Test VTT tag generation."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']

    # insert a master file inside the video
    add_master_to_video(
        video_deposit=video_1,
        filename='test.mp4',
        stream=BytesIO(b'1234'), video_duration="15"
    )
    # try to insert a new vtt object
    obj = ObjectVersion.create(
        video_1._bucket, key="test_fr.vtt",
        stream=BytesIO(b'hello'))
    # publish the video
    prepare_videos_for_publish([video_1])
    video_1 = deposit_video_resolver(video_1_depid)
    login_user(User.query.get(users[0]))
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj, video_1, content_type='vtt', media_type='subtitle',
                      context_type='subtitle', language='fr')

    # edit the video
    video_1 = video_1.edit()

    # try to delete the old vtt file and substitute with a new one
    video_1 = deposit_video_resolver(video_1_depid)
    ObjectVersion.delete(bucket=video_1._bucket, key=obj.key)
    obj2 = ObjectVersion.create(
        video_1._bucket, key="test_en.vtt", stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj2, video_1, content_type='vtt', media_type='subtitle',
                      context_type='subtitle', language='en')

    # edit a re-published video
    video_1 = video_1.edit()

    # add a new vtt file
    obj3 = ObjectVersion.create(
        video_1._bucket, key="test_it.vtt", stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj3, video_1, content_type='vtt', media_type='subtitle',
                      context_type='subtitle', language='it')
示例#11
0
def object_version(db, bucket):
    """Multipart object."""
    content = b'some content'
    obj = ObjectVersion.create(bucket,
                               'test.pdf',
                               stream=BytesIO(content),
                               size=len(content))
    db.session.commit()

    yield obj

    ObjectVersion.delete(bucket, obj.key)
    db.session.commit()
示例#12
0
    def rename(self, old_key, new_key):
        """Rename a file."""
        assert new_key not in self

        file_ = self[old_key]
        # create a new version with the new name
        obj = ObjectVersion.create(bucket=self.bucket,
                                   key=new_key,
                                   _file_id=file_.obj.file_id)
        self.record['_files'][self.keys.index(old_key)]['key'] = new_key
        # delete the old version
        ObjectVersion.delete(bucket=self.bucket, key=old_key)
        return obj
示例#13
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.add(loc)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
示例#14
0
    def rename(self, old_key, new_key):
        """Rename a file."""
        assert new_key not in self

        file_ = self[old_key]
        # create a new version with the new name
        obj = ObjectVersion.create(
            bucket=self.bucket, key=new_key,
            _file_id=file_.obj.file_id
        )
        self.record['_files'][self.keys.index(old_key)]['key'] = new_key
        # delete the old version
        ObjectVersion.delete(bucket=self.bucket, key=old_key)
        return obj
示例#15
0
def test_deposit_poster_tags(api_app, db, api_project, users):
    """Test poster tag generation."""
    project, video_1, video_2 = api_project
    video_1_depid = video_1['_deposit']['id']
    master_video_filename = 'test.mp4'
    poster_filename = 'poster.jpg'
    poster_filename2 = 'poster.png'

    # insert a master file inside the video
    add_master_to_video(video_deposit=video_1,
                        filename=master_video_filename,
                        stream=BytesIO(b'1234'),
                        video_duration='15')
    # try to insert a new vtt object
    obj = ObjectVersion.create(video_1._bucket,
                               key=poster_filename,
                               stream=BytesIO(b'hello'))
    # publish the video
    prepare_videos_for_publish([video_1])
    video_1 = deposit_video_resolver(video_1_depid)
    login_user(User.query.get(users[0]))
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj,
                      video_1,
                      content_type='jpg',
                      context_type='poster',
                      media_type='image')

    # edit the video
    video_1 = video_1.edit()

    # try to delete the old poster frame and substitute with a new one
    video_1 = deposit_video_resolver(video_1_depid)
    ObjectVersion.delete(bucket=video_1._bucket, key=obj.key)
    obj2 = ObjectVersion.create(video_1._bucket,
                                key=poster_filename2,
                                stream=BytesIO(b'hello'))

    # publish again the video
    video_1 = video_1.publish()

    # check tags
    check_object_tags(obj2,
                      video_1,
                      content_type='png',
                      context_type='poster',
                      media_type='image')
示例#16
0
def files():
    """Load files."""
    srcroot = dirname(dirname(__file__))
    d = current_app.config['DATADIR']
    if exists(d):
        shutil.rmtree(d)
    makedirs(d)

    # Clear data
    Part.query.delete()
    MultipartObject.query.delete()
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.commit()

    # Bucket 0
    b1 = Bucket.create(loc)
    b1.id = '00000000-0000-0000-0000-000000000000'
    for f in ['README.rst', 'LICENSE']:
        with open(join(srcroot, f), 'rb') as fp:
            ObjectVersion.create(b1, f, stream=fp)

    # Bucket 1
    b2 = Bucket.create(loc)
    b2.id = '11111111-1111-1111-1111-111111111111'
    k = 'AUTHORS.rst'
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)

    k = 'RELEASE-NOTES.rst'
    with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp:
        ObjectVersion.create(b2, k, stream=fp)
    ObjectVersion.delete(b2.id, k)

    # Bucket 2
    b2 = Bucket.create(loc)
    b2.id = '22222222-2222-2222-2222-222222222222'

    db.session.commit()
示例#17
0
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
示例#18
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.is_deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
示例#19
0
def test_object_restore(app, db, dummy_location):
    """Restore object."""
    f1 = FileInstance(uri="f1", size=1, checksum="mychecksum")
    f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2")
    db.session.add(f1)
    db.session.add(f2)
    b1 = Bucket.create()

    obj1 = ObjectVersion.create(b1, "test").set_file(f1)
    ObjectVersion.create(b1, "test").set_file(f2)
    obj_deleted = ObjectVersion.delete(b1, "test")
    db.session.commit()

    assert ObjectVersion.query.count() == 3
    # Cannot restore a deleted version.
    pytest.raises(InvalidOperationError, obj_deleted.restore)

    # Restore first version
    obj_new = obj1.restore()
    db.session.commit()

    assert ObjectVersion.query.count() == 4
    assert obj_new.is_head is True
    assert obj_new.version_id != obj1.version_id
    assert obj_new.key == obj1.key
    assert obj_new.file_id == obj1.file_id
    assert obj_new.bucket == obj1.bucket
示例#20
0
def test_object_delete(app, db, dummy_location):
    """Test object creation."""
    # Create three versions, with latest being a delete marker.
    with db.session.begin_nested():
        b1 = Bucket.create()
        ObjectVersion.create(b1, "test").set_location(
            "b1test1", 1, "achecksum")
        ObjectVersion.create(b1, "test").set_location(
            "b1test2", 1, "achecksum")
        obj_deleted = ObjectVersion.delete(b1, "test")

    assert ObjectVersion.query.count() == 3
    assert ObjectVersion.get(b1, "test") is None
    assert ObjectVersion.get_by_bucket(b1).count() == 0

    obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id)
    assert obj.deleted
    assert obj.file_id is None

    ObjectVersion.create(b1, "test").set_location(
        "b1test4", 1, "achecksum")

    assert ObjectVersion.query.count() == 4
    assert ObjectVersion.get(b1.id, "test") is not None
    assert ObjectVersion.get_by_bucket(b1.id).count() == 1
示例#21
0
 def __delitem__(self, key):
     """Delete a file from the deposit."""
     obj = ObjectVersion.delete(bucket=self.bucket, key=key)
     self.record['_files'] = [file_ for file_ in self.record['_files']
                              if file_['key'] != key]
     if obj is None:
         raise KeyError(key)
示例#22
0
def sync_buckets(src_bucket, dest_bucket, delete_extras=False):
    """Sync source bucket ObjectVersions to the destination bucket.

    The bucket is fully mirrored with the destination bucket following the
    logic:

        * same ObjectVersions are not touched
        * new ObjectVersions are added to destination
        * deleted ObjectVersions are deleted in destination
        * extra ObjectVersions in dest are deleted if `delete_extras` param is
          True

    :param src_bucket: Source bucket.
    :param dest_bucket: Destination bucket.
    :param delete_extras: Delete extra ObjectVersions in destination if True.
    :returns: The bucket with an exact copy of ObjectVersions in `
        `src_bucket``.
    """
    assert not dest_bucket.locked

    src_ovs = ObjectVersion.query.filter(
        ObjectVersion.bucket_id == src_bucket.id,
        ObjectVersion.is_head.is_(True)
    ).all()
    dest_ovs = ObjectVersion.query.filter(
        ObjectVersion.bucket_id == dest_bucket.id,
        ObjectVersion.is_head.is_(True)
    ).all()

    # transform into a dict { key: object version }
    src_keys = {ov.key: ov for ov in src_ovs}
    dest_keys = {ov.key: ov for ov in dest_ovs}

    for key, ov in src_keys.items():
        if not ov.deleted:
            if key not in dest_keys or \
                    ov.file_id != dest_keys[key].file_id:
                ov.copy(bucket=dest_bucket)
        elif key in dest_keys and not dest_keys[key].deleted:
            ObjectVersion.delete(dest_bucket, key)

    if delete_extras:
        for key, ov in dest_keys.items():
            if key not in src_keys:
                ObjectVersion.delete(dest_bucket, key)

    return dest_bucket
    def delete(self, key):
        """Delete a file."""
        rf = self[key]
        ov = rf.object_version
        # Delete the entire row
        rf.delete(force=True)
        if ov:
            # TODO: Should we also remove the FileInstance? Configurable?
            ObjectVersion.delete(ov.bucket, key)
        del self._entries[key]

        # Unset the default preview if the file is removed
        if self.default_preview == key:
            self.default_preview = None
        if key in self._order:
            self._order.remove(key)
        return rf
示例#24
0
def delete_file(bucket_id):
    key = ''
    deleted_file = ObjectVersion.delete(bucket_id, key)

    if deleted_file:
        return jsonify({"status": "ok"})

    return jsonify({"error": "not found"}), 404
示例#25
0
 def __delitem__(self, key):
     """Delete a file from the deposit."""
     obj = ObjectVersion.delete(bucket=self.bucket, key=key)
     self.record['_files'] = [
         file_ for file_ in self.record['_files'] if file_['key'] != key
     ]
     if obj is None:
         raise KeyError(key)
示例#26
0
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    # check that for 'undeleted' key there is only one HEAD
    heads = [
        o for o in ObjectVersion.query.filter_by(bucket_id=b1.id,
                                                 key='undeleted').all()
        if o.is_head
    ]
    assert len(heads) == 1
    assert heads[0].file.uri == 'b1u2'

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
    def __delitem__(self, key):
        """Delete a file from the deposit."""
        obj = ObjectVersion.delete(bucket=self.bucket, key=key)

        if obj is None:
            raise KeyError(key)

        if key in self.filesmap:
            del self.filesmap[key]
            self.flush()
示例#28
0
def test_object_remove_marker(app, db, bucket, objects):
    """Test object remove."""
    obj = objects[0]
    assert ObjectVersion.query.count() == 4
    obj = ObjectVersion.delete(obj.bucket, obj.key)
    db.session.commit()
    assert ObjectVersion.query.count() == 5
    obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id)
    obj.remove()
    assert ObjectVersion.query.count() == 4
示例#29
0
    def __delitem__(self, key):
        """Delete a file from the deposit."""
        obj = ObjectVersion.delete(bucket=self.bucket, key=key)

        if obj is None:
            raise KeyError(key)

        if key in self.filesmap:
            del self.filesmap[key]
            self.flush()
示例#30
0
def test_object_remove_marker(app, db, bucket, objects):
    """Test object remove."""
    obj = objects[0]
    assert ObjectVersion.query.count() == 4
    obj = ObjectVersion.delete(obj.bucket, obj.key)
    db.session.commit()
    assert ObjectVersion.query.count() == 5
    obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id)
    obj.remove()
    assert ObjectVersion.query.count() == 4
示例#31
0
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    # check that for 'undeleted' key there is only one HEAD
    heads = [o for o in ObjectVersion.query.filter_by(
        bucket_id=b1.id, key='undeleted').all() if o.is_head]
    assert len(heads) == 1
    assert heads[0].file.uri == 'b1u2'

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
示例#32
0
def remove_file(recid, key=None, index=None):
    """Remove a file from a published record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    obj = ObjectVersion.get(bucket, key)
    if obj is None:
        click.echo(click.style(u'File with key "{key}" not found.'.format(
            key=key, recid=recid), fg='red'))
        return

    click.echo(u'Will remove the following file:\n')
    click.echo(click.style(
        u'  key: "{key}"\n'
        u'  {checksum}\n'
        u'  bucket: {bucket}\n'
        u''.format(
            key=key.decode('utf-8'),
            checksum=obj.file.checksum,
            bucket=bucket.id),
        fg='green'))
    click.echo('from record:\n')
    click.echo(click.style(
        u'  Title: "{title}"\n'
        u'  RECID: {recid}\n'
        u'  UUID: {uuid}\n'
        u''.format(
            recid=record['recid'],
            title=record['title'],
            uuid=record.id),
        fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        bucket.size -= obj.file.size
        ObjectVersion.delete(bucket, obj.key)
        bucket.locked = True
        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File removed successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file removal.', fg='green'))
示例#33
0
    def delete(self, key, remove_obj=True, softdelete_obj=False):
        """Delete a file."""
        rf = self[key]
        ov = rf.object_version
        # Delete the entire row
        rf.delete(force=True)
        if ov and remove_obj:
            if remove_obj:
                rf.object_version.remove()
            elif softdelete_obj:
                ObjectVersion.delete(
                    rf.object_version.bucket, rf.object_version.key)
        del self._entries[key]

        # Unset the default preview if the file is removed
        if self.default_preview == key:
            self.default_preview = None
        if key in self._order:
            self._order.remove(key)
        return rf
示例#34
0
文件: cli.py 项目: lnielsen/zenodo
def remove_file(recid, key=None, index=None):
    """Remove a file from a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    obj = ObjectVersion.get(bucket, key)
    if obj is None:
        click.echo(click.style(u'File with key "{key}" not found.'.format(
            key=key, recid=recid), fg='red'))
        return

    click.echo(u'Will remove the following file:\n')
    click.echo(click.style(
        u'  key: "{key}"\n'
        u'  {checksum}\n'
        u'  bucket: {bucket}\n'
        u''.format(
            key=key,
            checksum=obj.file.checksum,
            bucket=bucket.id),
        fg='green'))
    click.echo('from record:\n')
    click.echo(click.style(
        u'  Title: "{title}"\n'
        u'  RECID: {recid}\n'
        u'  UUID: {uuid}\n'
        u''.format(
            recid=record['recid'],
            title=record['title'],
            uuid=record.id),
        fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        ObjectVersion.delete(bucket, obj.key)
        bucket.locked = True
        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File removed successfully.', fg='green'))
    else:
        click.echo(click.style(u'Aborted file removal.', fg='green'))
示例#35
0
def test_bucket_sync_deleted_object(app, db, dummy_location):
    """Test that a deleted object in src is deleted in dest."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum")
    ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum")
    ObjectVersion.delete(b1, "filename")
    db.session.commit()

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 0
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get(b2, "extra-deleted")

    ObjectVersion.delete(b2, "extra-deleted")
    db.session.commit()

    b1.sync(b2)

    assert ObjectVersion.get_by_bucket(b1).count() == 0
    assert ObjectVersion.get_by_bucket(b2).count() == 0
示例#36
0
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
示例#37
0
def test_object_snapshot(app, db, dummy_location):
    """Test snapshot creation."""
    b1 = Bucket.create()
    b2 = Bucket.create()
    ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum")
    ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum")
    ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum")
    ObjectVersion.delete(b1, "deleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum")
    ObjectVersion.delete(b1, "undeleted")
    ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum")
    ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum")
    ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum")
    db.session.commit()

    assert ObjectVersion.query.count() == 9
    assert FileInstance.query.count() == 7
    assert Bucket.query.count() == 2
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1

    b3 = b1.snapshot(lock=True)
    db.session.commit()

    # Must be locked as requested.
    assert b1.locked is False
    assert b3.locked is True

    assert Bucket.query.count() == 3
    assert ObjectVersion.query.count() == 12
    assert FileInstance.query.count() == 7
    assert ObjectVersion.get_by_bucket(b1).count() == 3
    assert ObjectVersion.get_by_bucket(b2).count() == 1
    assert ObjectVersion.get_by_bucket(b3).count() == 3
    assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8
    assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
示例#38
0
def test_dereference_when_not_head(api, location, es,
                                   httpserver: pytest_httpserver.HTTPServer):
    with api.test_request_context():
        record = SWORDDeposit.create({})

        object_version = ObjectVersion.create(bucket=record.bucket,
                                              key="some-file.txt",
                                              stream=io.BytesIO(b"data"))
        TagManager(object_version).update({
            ObjectTagKey.ByReferenceURL:
            httpserver.url_for("some-file.txt"),
            ObjectTagKey.Packaging:
            PackagingFormat.SimpleZip,
        })
        # This makes the object version we already had a non-head one
        ObjectVersion.delete(record.bucket, object_version.key)

        httpserver.expect_request("/some-file.txt").respond_with_data(b"data")

        db.session.refresh(object_version)

        result = tasks.dereference_object(record.id, object_version.version_id)
        assert result == ["some-file.txt"]
        assert httpserver.log == []
示例#39
0
    def commit(self):
        """Store changes on current instance in database.

        This method extends the default implementation by publishing the
        deposition when 'publication_state' is set to 'published'.
        """
        if 'external_pids' in self:
            deposit_id = self['_deposit']['id']
            recid = PersistentIdentifier.query.filter_by(
                pid_value=deposit_id).first()
            assert recid.status == 'R'
            record_bucket = RecordsBuckets.query.filter_by(
                record_id=recid.pid_value).first()
            bucket = Bucket.query.filter_by(id=record_bucket.bucket_id).first()
            object_versions = ObjectVersion.query.filter_by(
                bucket_id=bucket.id).all()
            key_to_pid = {
                ext_pid.get('key'): ext_pid.get('ePIC_PID')
                for ext_pid in self['external_pids']
            }
            # for the existing files
            for object_version in object_versions:
                if object_version.file is None or \
                        object_version.file.storage_class != 'B':
                    continue
                # check that they are still in the file pids list or remove
                if object_version.key not in key_to_pid:
                    ObjectVersion.delete(bucket,
                                         object_version.key)
                # check that the uri is still the same or update it
                elif object_version.file.uri != \
                        key_to_pid[object_version.key]:
                    db.session.query(FileInstance).\
                        filter(FileInstance.id == object_version.file_id).\
                        update({"uri": key_to_pid[object_version.key]})
            create_b2safe_file(self['external_pids'], bucket)
            del self['external_pids']

        if self.model is None or self.model.json is None:
            raise MissingModelError()

        # automatically make embargoed records private
        if self.get('embargo_date') and self.get('open_access'):
            if is_under_embargo(self):
                self['open_access'] = False

        if 'community' in self:
            try:
                community = Community.get(self['community'])
            except CommunityDoesNotExistError as e:
                raise InvalidDepositError('Community {} does not exist.'.format(
                    self['community'])) from e
            workflow = publication_workflows[community.publication_workflow]
            workflow(self.model, self)

        # publish the deposition if needed
        if (self['publication_state'] == PublicationStates.published.name
                # check invenio-deposit status so that we do not loop
                and self['_deposit']['status'] != PublicationStates.published.name):

            # Retrieve previous version in order to reindex it later.
            previous_version_pid = None
            # Save the previous "last" version for later use
            if self.versioning.parent.status == PIDStatus.REDIRECTED and \
                    self.versioning.has_children:
                previous_version_pid = self.versioning.last_child
                previous_version_uuid = str(RecordUUIDProvider.get(
                    previous_version_pid.pid_value
                ).pid.object_uuid)
            external_pids = generate_external_pids(self)
            if external_pids:
                self['_deposit']['external_pids'] = external_pids

            super(Deposit, self).publish()  # publish() already calls commit()
            # Register parent PID if necessary and update redirect
            self.versioning.update_redirect()
            # Reindex previous version. This is needed in order to update
            # the is_last_version flag
            if previous_version_pid is not None:
                self.indexer.index_by_id(previous_version_uuid)

            # save the action for later indexing
            if g:
                g.deposit_action = 'publish'
        else:
            super(Deposit, self).commit()
            if g:
                g.deposit_action = 'update-metadata'
        return self
示例#40
0
    def commit(self):
        """Store changes on current instance in database.

        This method extends the default implementation by publishing the
        deposition when 'publication_state' is set to 'published'.
        """
        if 'external_pids' in self:
            deposit_id = self['_deposit']['id']
            recid = PersistentIdentifier.query.filter_by(
                pid_value=deposit_id).first()
            assert recid.status == 'R'
            record_bucket = RecordsBuckets.query.filter_by(
                record_id=recid.pid_value).first()
            bucket = Bucket.query.filter_by(id=record_bucket.bucket_id).first()
            object_versions = ObjectVersion.query.filter_by(
                bucket_id=bucket.id).all()
            key_to_pid = {
                ext_pid.get('key'): ext_pid.get('ePIC_PID')
                for ext_pid in self['external_pids']
            }
            # for the existing files
            for object_version in object_versions:
                if object_version.file is None or \
                        object_version.file.storage_class != 'B':
                    continue
                # check that they are still in the file pids list or remove
                if object_version.key not in key_to_pid:
                    ObjectVersion.delete(bucket,
                                         object_version.key)
                # check that the uri is still the same or update it
                elif object_version.file.uri != \
                        key_to_pid[object_version.key]:
                    db.session.query(FileInstance).\
                        filter(FileInstance.id == object_version.file_id).\
                        update({"uri": key_to_pid[object_version.key]})
            create_b2safe_file(self['external_pids'], bucket)
            del self['external_pids']

        if self.model is None or self.model.json is None:
            raise MissingModelError()

        # automatically make embargoed records private
        if self.get('embargo_date') and self.get('open_access'):
            if is_under_embargo(self):
                self['open_access'] = False

        if 'community' in self:
            try:
                community = Community.get(self['community'])
            except CommunityDoesNotExistError as e:
                raise InvalidDepositError('Community {} does not exist.'.format(
                    self['community'])) from e
            workflow = publication_workflows[community.publication_workflow]
            workflow(self.model, self)

        # publish the deposition if needed
        if (self['publication_state'] == PublicationStates.published.name
                # check invenio-deposit status so that we do not loop
                and self['_deposit']['status'] != PublicationStates.published.name):

            # Retrieve previous version in order to reindex it later.
            previous_version_pid = None
            # Save the previous "last" version for later use
            if self.versioning.parent.status == PIDStatus.REDIRECTED and \
                    self.versioning.has_children:
                previous_version_pid = self.versioning.last_child
                previous_version_uuid = str(RecordUUIDProvider.get(
                    previous_version_pid.pid_value
                ).pid.object_uuid)
            external_pids = generate_external_pids(self)
            if external_pids:
                self['_deposit']['external_pids'] = external_pids

            super(Deposit, self).publish()  # publish() already calls commit()
            # Register parent PID if necessary and update redirect
            self.versioning.update_redirect()
            # Reindex previous version. This is needed in order to update
            # the is_last_version flag
            if previous_version_pid is not None:
                self.indexer.index_by_id(previous_version_uuid)

            # save the action for later indexing
            if g:
                g.deposit_action = 'publish'
        else:
            super(Deposit, self).commit()
            if g:
                g.deposit_action = 'update-metadata'
        return self
示例#41
0
文件: cli.py 项目: lnielsen/zenodo
def add_file(recid, fp, replace_existing):
    """Add a new file to a publishd record."""
    pid, record = record_resolver.resolve(recid)
    bucket = record.files.bucket
    key = os.path.basename(fp.name)

    obj = ObjectVersion.get(bucket, key)
    if obj is not None and not replace_existing:
        click.echo(click.style(u'File with key "{key}" already exists.'
                   u' Use `--replace-existing/-f` to overwrite it.'.format(
                        key=key, recid=recid), fg='red'))
        return

    fp.seek(SEEK_SET, SEEK_END)
    size = fp.tell()
    fp.seek(SEEK_SET)

    click.echo(u'Will add the following file:\n')
    click.echo(click.style(
        u'  key: "{key}"\n'
        u'  bucket: {bucket}\n'
        u'  size: {size}\n'
        u''.format(
            key=key,
            bucket=bucket.id,
            size=size),
        fg='green'))
    click.echo(u'to record:\n')
    click.echo(click.style(
        u'  Title: "{title}"\n'
        u'  RECID: {recid}\n'
        u'  UUID: {uuid}\n'
        u''.format(
            recid=record['recid'],
            title=record['title'],
            uuid=record.id),
        fg='green'))
    if replace_existing and obj is not None:
        click.echo(u'and remove the file:\n')
        click.echo(click.style(
            u'  key: "{key}"\n'
            u'  bucket: {bucket}\n'
            u'  size: {size}\n'
            u''.format(
                key=obj.key,
                bucket=obj.bucket,
                size=obj.file.size),
            fg='green'))

    if click.confirm(u'Continue?'):
        bucket.locked = False
        if obj is not None and replace_existing:
            ObjectVersion.delete(bucket, obj.key)
        ObjectVersion.create(bucket, key, stream=fp, size=size)
        bucket.locked = True

        record.files.flush()
        record.commit()
        db.session.commit()
        click.echo(click.style(u'File added successfully.', fg='green'))
    else:
        click.echo(click.style(u'File addition aborted.', fg='green'))
示例#42
0
 def _clean_file(cls, frame):
     """Clean object and file."""
     obj = ObjectVersion.query.filter_by(
         version_id=frame['version_id']).one()
     ObjectVersion.delete(bucket=obj.bucket, key=obj.key)
示例#43
0
    def set_metadata(
        self,
        source: typing.Optional[typing.Union[BytesReader, dict]],
        metadata_class: typing.Type[Metadata],
        content_type: str = None,
        derived_from: str = None,
        replace: bool = True,
    ) -> typing.Optional[Metadata]:
        if isinstance(source, dict):
            source = io.BytesIO(json.dumps(source).encode("utf-8"))

        if not content_type:
            content_type = metadata_class.content_type

        existing_metadata_object = (ObjectVersion.query.join(
            ObjectVersion.tags).filter(
                ObjectVersion.is_head == true(),
                ObjectVersion.file_id.isnot(None),
                ObjectVersion.bucket == self.bucket,
                ObjectVersionTag.key == ObjectTagKey.MetadataFormat.value,
                ObjectVersionTag.value == metadata_class.metadata_format,
            ).first())

        if source is None:
            if replace and existing_metadata_object:
                ObjectVersion.delete(
                    bucket=existing_metadata_object.bucket,
                    key=existing_metadata_object.key,
                )

            if replace and (self.get("swordMetadataSourceFormat")
                            == metadata_class.metadata_format):
                self.pop("swordMetadata", None)
                self.pop("swordMetadataSourceFormat", None)

            return None
        else:
            content_type, content_type_options = parse_options_header(
                content_type)

            encoding = content_type_options.get("charset")
            if isinstance(encoding, str):
                metadata = metadata_class.from_document(
                    source,
                    content_type=content_type,
                    encoding=encoding,
                )
            else:
                metadata = metadata_class.from_document(
                    source,
                    content_type=content_type,
                )

            if existing_metadata_object and not replace:
                with existing_metadata_object.file.storage().open(
                ) as existing_metadata_f:
                    existing_metadata = metadata_class.from_document(
                        existing_metadata_f,
                        content_type=metadata_class.content_type,
                    )
                try:
                    metadata = existing_metadata + metadata
                except TypeError:
                    raise Conflict(
                        "Existing or new metadata is of wrong type for appending. Reconcile client-side and PUT instead"
                    )

            metadata_filename = self.metadata_key_prefix + metadata_class.filename

            if (isinstance(metadata, SWORDMetadata)
                    or "swordMetadata" not in self
                    or (not isinstance(metadata, SWORDMetadata)
                        and self["swordMetadataSourceFormat"]
                        == metadata_class.metadata_format)):
                metadata.update_record_metadata(self)
                self["swordMetadata"] = metadata.to_sword_metadata()
                self[
                    "swordMetadataSourceFormat"] = metadata_class.metadata_format

            object_version = ObjectVersion.create(
                bucket=self.bucket,
                key=metadata_filename,
                stream=io.BytesIO(bytes(metadata)),
            )

            tags = TagManager(object_version)
            tags[ObjectTagKey.MetadataFormat] = metadata_class.metadata_format
            if derived_from:
                tags[ObjectTagKey.DerivedFrom] = derived_from

            return metadata