def test_get_empty_bucket(db, client, headers, bucket, objects, permissions, get_json): """Test getting objects from an empty bucket.""" # Delete the objects created in the fixtures to have an empty bucket with # permissions set up. for obj in objects: ObjectVersion.delete(obj.bucket_id, obj.key) db.session.commit() cases = [ (None, 404), ('auth', 404), ('objects', 404), # TODO - return 403 instead ('bucket', 200), ('location', 200), ] for user, expected in cases: login_user(client, permissions[user]) resp = client.get( url_for('invenio_files_rest.bucket_api', bucket_id=bucket.id), headers=headers ) assert resp.status_code == expected if resp.status_code == 200: assert get_json(resp)['contents'] == []
def dispose_object_version(object_version): """Clean up resources related to an ObjectVersion.""" if object_version: object_version = as_object_version(object_version) # remove the object version ObjectVersion.delete(bucket=object_version.bucket, key=object_version.key)
def test_bucket_sync(app, db, dummy_location): """Test that a bucket is correctly synced.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename1").set_location("b1v11", 1, "achecksum") ObjectVersion.create(b1, "filename2").set_location("b1v12", 1, "achecksum") ObjectVersion.create(b1, "filename3").set_location("b1v13", 1, "achecksum") ObjectVersion.create(b2, "extra1").set_location("b2v11", 1, "achecksum") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 4 ObjectVersion.delete(b1, "filename1") ObjectVersion.create(b2, "extra2").set_location("b2v12", 1, "achecksum") ObjectVersion.create(b2, "extra3").set_location("b2v13", 1, "achecksum") ObjectVersion.delete(b2, "extra3") db.session.commit() b1.sync(b2, delete_extras=True) assert ObjectVersion.get_by_bucket(b1).count() == 2 assert ObjectVersion.get_by_bucket(b2).count() == 2
def rename_file(recid, key, new_key): """Remove a file from a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket obj = ObjectVersion.get(bucket, key) if obj is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key), fg='red')) return new_obj = ObjectVersion.get(bucket, new_key) if new_obj is not None: click.echo(click.style(u'File with key "{key}" already exists.'.format( key=new_key), fg='red')) return if click.confirm(u'Rename "{key}" to "{new_key}" on bucket {bucket}.' u' Continue?'.format( key=obj.key, new_key=new_key, bucket=bucket.id)): record.files.bucket.locked = False file_id = obj.file.id ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, new_key, _file_id=file_id) record.files.bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File renamed successfully.', fg='green')) else: click.echo(click.style(u'Aborted file rename.', fg='green'))
def test_get_empty_bucket(db, client, headers, bucket, objects, permissions, get_json): """Test getting objects from an empty bucket.""" # Delete the objects created in the fixtures to have an empty bucket with # permissions set up. for obj in objects: ObjectVersion.delete(obj.bucket_id, obj.key) db.session.commit() cases = [ (None, 404), ('auth', 404), ('objects', 404), # TODO - return 403 instead ('bucket', 200), ('location', 200), ] for user, expected in cases: login_user(client, permissions[user]) resp = client.get(url_for('invenio_files_rest.bucket_api', bucket_id=bucket.id), headers=headers) assert resp.status_code == expected if resp.status_code == 200: assert get_json(resp)['contents'] == []
def add_file(recid, fp, replace_existing): """Add a new file to a published record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket key = os.path.basename(fp.name) obj = ObjectVersion.get(bucket, key) if obj is not None and not replace_existing: click.echo( click.style( u'File with key "{key}" already exists.' u' Use `--replace-existing/-f` to overwrite it.'.format( key=key, recid=recid), fg='red')) return fp.seek(SEEK_SET, SEEK_END) size = fp.tell() fp.seek(SEEK_SET) click.echo(u'Will add the following file:\n') click.echo( click.style(u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format(key=key, bucket=bucket.id, size=size), fg='green')) click.echo(u'to record:\n') click.echo( click.style(u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format(recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if replace_existing and obj is not None: click.echo(u'and remove the file:\n') click.echo( click.style(u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format(key=obj.key, bucket=obj.bucket, size=obj.file.size), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False if obj is not None and replace_existing: ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, key, stream=fp, size=size) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File added successfully.', fg='green')) else: click.echo(click.style(u'File addition aborted.', fg='green'))
def test_sync_records_with_deposits(app, db, location, users, project_deposit_metadata, video_deposit_metadata): """Test sync records with deposits task.""" # create a project project = Project.create(project_deposit_metadata) project_deposit_metadata['report_number'] = ['123'] # create new video video_deposit_metadata['_project_id'] = project['_deposit']['id'] deposit = Video.create(video_deposit_metadata) depid = deposit['_deposit']['id'] # insert objects inside the deposit ObjectVersion.create(deposit.files.bucket, "obj_1").set_location("mylocation1", 1, "mychecksum1") ObjectVersion.create(deposit.files.bucket, "obj_2").set_location("mylocation2", 1, "mychecksum2") ObjectVersion.create(deposit.files.bucket, "obj_3").set_location("mylocation3", 1, "mychecksum3") obj_4 = ObjectVersion.create(deposit.files.bucket, "obj_4").set_location("mylocation4", 1, "mychecksum4") # publish login_user(User.query.get(users[0])) prepare_videos_for_publish([deposit]) deposit = deposit.publish() _, record = deposit.fetch_published() assert deposit.is_published() is True # add a new object ObjectVersion.create(deposit.files.bucket, "obj_new").set_location("mylocation_new", 1, "mychecksum") # modify obj_1 ObjectVersion.create(deposit.files.bucket, "obj_new").set_location("mylocation2.1", 1, "mychecksum2.1") # delete obj_3 ObjectVersion.delete(deposit.files.bucket, "obj_3") # remove obj_4 obj_4.remove() # check video and record files = ['obj_1', 'obj_2', 'obj_3', 'obj_4'] edited_files = ['obj_1', 'obj_2', 'obj_3', 'obj_new'] check_deposit_record_files(deposit, edited_files, record, files) # try to sync deposit and record sync_records_with_deposit_files.s(deposit_id=depid).apply_async() # get deposit and record deposit = deposit_video_resolver(depid) _, record = deposit.fetch_published() assert deposit.is_published() is True # check that record and deposit are sync re_edited_files = edited_files + ['obj_4'] check_deposit_record_files(deposit, edited_files, record, re_edited_files)
def test_deposit_prepare_edit(api_app, db, location, project_deposit_metadata): """Test deposit prepare edit.""" # create new deposit project_deposit_metadata['report_number'] = ['123'] deposit = CDSDeposit.create(project_deposit_metadata, bucket_location=location.name) assert deposit.is_published() is False assert deposit.has_record() is False # insert objects inside the deposit ObjectVersion.create(deposit.files.bucket, "obj_1").set_location("mylocation1", 1, "mychecksum1") ObjectVersion.create(deposit.files.bucket, "obj_2").set_location("mylocation2", 1, "mychecksum2") ObjectVersion.create(deposit.files.bucket, "obj_3").set_location("mylocation3", 1, "mychecksum3") obj_4 = ObjectVersion.create(deposit.files.bucket, "obj_4").set_location("mylocation4", 1, "mychecksum4") # publish deposit = deposit.publish() _, record = deposit.fetch_published() assert deposit.is_published() is True assert deposit.has_record() is True # check record bucket is locked files = ['obj_1', 'obj_2', 'obj_3', 'obj_4'] check_deposit_record_files(deposit, files, record, files) # add a new object ObjectVersion.create(deposit.files.bucket, "obj_new").set_location("mylocation_new", 1, "mychecksum") # modify obj_1 ObjectVersion.create(deposit.files.bucket, "obj_new").set_location("mylocation2.1", 1, "mychecksum2.1") # delete obj_3 ObjectVersion.delete(deposit.files.bucket, "obj_3") # remove obj_4 obj_4.remove() # check deposit and record edited_files = ['obj_1', 'obj_2', 'obj_3', 'obj_new'] check_deposit_record_files(deposit, edited_files, record, files) # edit deposit = deposit.edit() assert deposit.is_published() is False assert deposit.has_record() is True # check the situation check_deposit_record_files(deposit, edited_files, record, files) # publish again deposit = deposit.publish() assert deposit.is_published() is True assert deposit.has_record() is True # check that record and deposit are sync re_edited_files = edited_files + ['obj_4'] check_deposit_record_files(deposit, edited_files, record, re_edited_files)
def delete_object_version(obj: ObjectVersion): """Delete file on filesystem and soft delete on database.""" current_app.logger.debug(f"Delete Object Version: {str(obj)}") # Soft delete bucket obj.delete(obj.bucket, obj.key) delete_file_instance(obj) db.session.commit()
def test_deposit_vtt_tags(api_app, db, api_project, users): """Test VTT tag generation.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] # insert a master file inside the video add_master_to_video( video_deposit=video_1, filename='test.mp4', stream=BytesIO(b'1234'), video_duration="15" ) # try to insert a new vtt object obj = ObjectVersion.create( video_1._bucket, key="test_fr.vtt", stream=BytesIO(b'hello')) # publish the video prepare_videos_for_publish([video_1]) video_1 = deposit_video_resolver(video_1_depid) login_user(User.query.get(users[0])) video_1 = video_1.publish() # check tags check_object_tags(obj, video_1, content_type='vtt', media_type='subtitle', context_type='subtitle', language='fr') # edit the video video_1 = video_1.edit() # try to delete the old vtt file and substitute with a new one video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create( video_1._bucket, key="test_en.vtt", stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj2, video_1, content_type='vtt', media_type='subtitle', context_type='subtitle', language='en') # edit a re-published video video_1 = video_1.edit() # add a new vtt file obj3 = ObjectVersion.create( video_1._bucket, key="test_it.vtt", stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj3, video_1, content_type='vtt', media_type='subtitle', context_type='subtitle', language='it')
def object_version(db, bucket): """Multipart object.""" content = b'some content' obj = ObjectVersion.create(bucket, 'test.pdf', stream=BytesIO(content), size=len(content)) db.session.commit() yield obj ObjectVersion.delete(bucket, obj.key) db.session.commit()
def rename(self, old_key, new_key): """Rename a file.""" assert new_key not in self file_ = self[old_key] # create a new version with the new name obj = ObjectVersion.create(bucket=self.bucket, key=new_key, _file_id=file_.obj.file_id) self.record['_files'][self.keys.index(old_key)]['key'] = new_key # delete the old version ObjectVersion.delete(bucket=self.bucket, key=old_key) return obj
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.add(loc) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def rename(self, old_key, new_key): """Rename a file.""" assert new_key not in self file_ = self[old_key] # create a new version with the new name obj = ObjectVersion.create( bucket=self.bucket, key=new_key, _file_id=file_.obj.file_id ) self.record['_files'][self.keys.index(old_key)]['key'] = new_key # delete the old version ObjectVersion.delete(bucket=self.bucket, key=old_key) return obj
def test_deposit_poster_tags(api_app, db, api_project, users): """Test poster tag generation.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] master_video_filename = 'test.mp4' poster_filename = 'poster.jpg' poster_filename2 = 'poster.png' # insert a master file inside the video add_master_to_video(video_deposit=video_1, filename=master_video_filename, stream=BytesIO(b'1234'), video_duration='15') # try to insert a new vtt object obj = ObjectVersion.create(video_1._bucket, key=poster_filename, stream=BytesIO(b'hello')) # publish the video prepare_videos_for_publish([video_1]) video_1 = deposit_video_resolver(video_1_depid) login_user(User.query.get(users[0])) video_1 = video_1.publish() # check tags check_object_tags(obj, video_1, content_type='jpg', context_type='poster', media_type='image') # edit the video video_1 = video_1.edit() # try to delete the old poster frame and substitute with a new one video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create(video_1._bucket, key=poster_filename2, stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj2, video_1, content_type='png', context_type='poster', media_type='image')
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def test_object_restore(app, db, dummy_location): """Restore object.""" f1 = FileInstance(uri="f1", size=1, checksum="mychecksum") f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2") db.session.add(f1) db.session.add(f2) b1 = Bucket.create() obj1 = ObjectVersion.create(b1, "test").set_file(f1) ObjectVersion.create(b1, "test").set_file(f2) obj_deleted = ObjectVersion.delete(b1, "test") db.session.commit() assert ObjectVersion.query.count() == 3 # Cannot restore a deleted version. pytest.raises(InvalidOperationError, obj_deleted.restore) # Restore first version obj_new = obj1.restore() db.session.commit() assert ObjectVersion.query.count() == 4 assert obj_new.is_head is True assert obj_new.version_id != obj1.version_id assert obj_new.key == obj1.key assert obj_new.file_id == obj1.file_id assert obj_new.bucket == obj1.bucket
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.is_deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def test_object_delete(app, db, dummy_location): """Test object creation.""" # Create three versions, with latest being a delete marker. with db.session.begin_nested(): b1 = Bucket.create() ObjectVersion.create(b1, "test").set_location( "b1test1", 1, "achecksum") ObjectVersion.create(b1, "test").set_location( "b1test2", 1, "achecksum") obj_deleted = ObjectVersion.delete(b1, "test") assert ObjectVersion.query.count() == 3 assert ObjectVersion.get(b1, "test") is None assert ObjectVersion.get_by_bucket(b1).count() == 0 obj = ObjectVersion.get(b1, "test", version_id=obj_deleted.version_id) assert obj.deleted assert obj.file_id is None ObjectVersion.create(b1, "test").set_location( "b1test4", 1, "achecksum") assert ObjectVersion.query.count() == 4 assert ObjectVersion.get(b1.id, "test") is not None assert ObjectVersion.get_by_bucket(b1.id).count() == 1
def __delitem__(self, key): """Delete a file from the deposit.""" obj = ObjectVersion.delete(bucket=self.bucket, key=key) self.record['_files'] = [file_ for file_ in self.record['_files'] if file_['key'] != key] if obj is None: raise KeyError(key)
def sync_buckets(src_bucket, dest_bucket, delete_extras=False): """Sync source bucket ObjectVersions to the destination bucket. The bucket is fully mirrored with the destination bucket following the logic: * same ObjectVersions are not touched * new ObjectVersions are added to destination * deleted ObjectVersions are deleted in destination * extra ObjectVersions in dest are deleted if `delete_extras` param is True :param src_bucket: Source bucket. :param dest_bucket: Destination bucket. :param delete_extras: Delete extra ObjectVersions in destination if True. :returns: The bucket with an exact copy of ObjectVersions in ` `src_bucket``. """ assert not dest_bucket.locked src_ovs = ObjectVersion.query.filter( ObjectVersion.bucket_id == src_bucket.id, ObjectVersion.is_head.is_(True) ).all() dest_ovs = ObjectVersion.query.filter( ObjectVersion.bucket_id == dest_bucket.id, ObjectVersion.is_head.is_(True) ).all() # transform into a dict { key: object version } src_keys = {ov.key: ov for ov in src_ovs} dest_keys = {ov.key: ov for ov in dest_ovs} for key, ov in src_keys.items(): if not ov.deleted: if key not in dest_keys or \ ov.file_id != dest_keys[key].file_id: ov.copy(bucket=dest_bucket) elif key in dest_keys and not dest_keys[key].deleted: ObjectVersion.delete(dest_bucket, key) if delete_extras: for key, ov in dest_keys.items(): if key not in src_keys: ObjectVersion.delete(dest_bucket, key) return dest_bucket
def delete(self, key): """Delete a file.""" rf = self[key] ov = rf.object_version # Delete the entire row rf.delete(force=True) if ov: # TODO: Should we also remove the FileInstance? Configurable? ObjectVersion.delete(ov.bucket, key) del self._entries[key] # Unset the default preview if the file is removed if self.default_preview == key: self.default_preview = None if key in self._order: self._order.remove(key) return rf
def delete_file(bucket_id): key = '' deleted_file = ObjectVersion.delete(bucket_id, key) if deleted_file: return jsonify({"status": "ok"}) return jsonify({"error": "not found"}), 404
def __delitem__(self, key): """Delete a file from the deposit.""" obj = ObjectVersion.delete(bucket=self.bucket, key=key) self.record['_files'] = [ file_ for file_ in self.record['_files'] if file_['key'] != key ] if obj is None: raise KeyError(key)
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [ o for o in ObjectVersion.query.filter_by(bucket_id=b1.id, key='undeleted').all() if o.is_head ] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def __delitem__(self, key): """Delete a file from the deposit.""" obj = ObjectVersion.delete(bucket=self.bucket, key=key) if obj is None: raise KeyError(key) if key in self.filesmap: del self.filesmap[key] self.flush()
def test_object_remove_marker(app, db, bucket, objects): """Test object remove.""" obj = objects[0] assert ObjectVersion.query.count() == 4 obj = ObjectVersion.delete(obj.bucket, obj.key) db.session.commit() assert ObjectVersion.query.count() == 5 obj = ObjectVersion.get(obj.bucket, obj.key, version_id=obj.version_id) obj.remove() assert ObjectVersion.query.count() == 4
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 # check that for 'undeleted' key there is only one HEAD heads = [o for o in ObjectVersion.query.filter_by( bucket_id=b1.id, key='undeleted').all() if o.is_head] assert len(heads) == 1 assert heads[0].file.uri == 'b1u2' b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def remove_file(recid, key=None, index=None): """Remove a file from a published record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket obj = ObjectVersion.get(bucket, key) if obj is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key, recid=recid), fg='red')) return click.echo(u'Will remove the following file:\n') click.echo(click.style( u' key: "{key}"\n' u' {checksum}\n' u' bucket: {bucket}\n' u''.format( key=key.decode('utf-8'), checksum=obj.file.checksum, bucket=bucket.id), fg='green')) click.echo('from record:\n') click.echo(click.style( u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format( recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False bucket.size -= obj.file.size ObjectVersion.delete(bucket, obj.key) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File removed successfully.', fg='green')) else: click.echo(click.style(u'Aborted file removal.', fg='green'))
def delete(self, key, remove_obj=True, softdelete_obj=False): """Delete a file.""" rf = self[key] ov = rf.object_version # Delete the entire row rf.delete(force=True) if ov and remove_obj: if remove_obj: rf.object_version.remove() elif softdelete_obj: ObjectVersion.delete( rf.object_version.bucket, rf.object_version.key) del self._entries[key] # Unset the default preview if the file is removed if self.default_preview == key: self.default_preview = None if key in self._order: self._order.remove(key) return rf
def remove_file(recid, key=None, index=None): """Remove a file from a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket obj = ObjectVersion.get(bucket, key) if obj is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key, recid=recid), fg='red')) return click.echo(u'Will remove the following file:\n') click.echo(click.style( u' key: "{key}"\n' u' {checksum}\n' u' bucket: {bucket}\n' u''.format( key=key, checksum=obj.file.checksum, bucket=bucket.id), fg='green')) click.echo('from record:\n') click.echo(click.style( u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format( recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False ObjectVersion.delete(bucket, obj.key) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File removed successfully.', fg='green')) else: click.echo(click.style(u'Aborted file removal.', fg='green'))
def test_bucket_sync_deleted_object(app, db, dummy_location): """Test that a deleted object in src is deleted in dest.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b2, "filename").set_location("b2v1", 1, "achecksum") ObjectVersion.create(b2, "extra-deleted").set_location("b3v1", 1, "asum") ObjectVersion.delete(b1, "filename") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 0 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b2, "extra-deleted") ObjectVersion.delete(b2, "extra-deleted") db.session.commit() b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 0 assert ObjectVersion.get_by_bucket(b2).count() == 0
def test_object_snapshot(app, db, dummy_location): """Test snapshot creation.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "versioned").set_location("b1v1", 1, "achecksum") ObjectVersion.create(b1, "versioned").set_location("b1v2", 1, "achecksum") ObjectVersion.create(b1, "deleted").set_location("b1d1", 1, "achecksum") ObjectVersion.delete(b1, "deleted") ObjectVersion.create(b1, "undeleted").set_location("b1u1", 1, "achecksum") ObjectVersion.delete(b1, "undeleted") ObjectVersion.create(b1, "undeleted").set_location("b1u2", 1, "achecksum") ObjectVersion.create(b1, "simple").set_location("b1s1", 1, "achecksum") ObjectVersion.create(b2, "another").set_location("b2a1", 1, "achecksum") db.session.commit() assert ObjectVersion.query.count() == 9 assert FileInstance.query.count() == 7 assert Bucket.query.count() == 2 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 b3 = b1.snapshot(lock=True) db.session.commit() # Must be locked as requested. assert b1.locked is False assert b3.locked is True assert Bucket.query.count() == 3 assert ObjectVersion.query.count() == 12 assert FileInstance.query.count() == 7 assert ObjectVersion.get_by_bucket(b1).count() == 3 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get_by_bucket(b3).count() == 3 assert ObjectVersion.get_by_bucket(b1, versions=True).count() == 8 assert ObjectVersion.get_by_bucket(b3, versions=True).count() == 3
def test_dereference_when_not_head(api, location, es, httpserver: pytest_httpserver.HTTPServer): with api.test_request_context(): record = SWORDDeposit.create({}) object_version = ObjectVersion.create(bucket=record.bucket, key="some-file.txt", stream=io.BytesIO(b"data")) TagManager(object_version).update({ ObjectTagKey.ByReferenceURL: httpserver.url_for("some-file.txt"), ObjectTagKey.Packaging: PackagingFormat.SimpleZip, }) # This makes the object version we already had a non-head one ObjectVersion.delete(record.bucket, object_version.key) httpserver.expect_request("/some-file.txt").respond_with_data(b"data") db.session.refresh(object_version) result = tasks.dereference_object(record.id, object_version.version_id) assert result == ["some-file.txt"] assert httpserver.log == []
def commit(self): """Store changes on current instance in database. This method extends the default implementation by publishing the deposition when 'publication_state' is set to 'published'. """ if 'external_pids' in self: deposit_id = self['_deposit']['id'] recid = PersistentIdentifier.query.filter_by( pid_value=deposit_id).first() assert recid.status == 'R' record_bucket = RecordsBuckets.query.filter_by( record_id=recid.pid_value).first() bucket = Bucket.query.filter_by(id=record_bucket.bucket_id).first() object_versions = ObjectVersion.query.filter_by( bucket_id=bucket.id).all() key_to_pid = { ext_pid.get('key'): ext_pid.get('ePIC_PID') for ext_pid in self['external_pids'] } # for the existing files for object_version in object_versions: if object_version.file is None or \ object_version.file.storage_class != 'B': continue # check that they are still in the file pids list or remove if object_version.key not in key_to_pid: ObjectVersion.delete(bucket, object_version.key) # check that the uri is still the same or update it elif object_version.file.uri != \ key_to_pid[object_version.key]: db.session.query(FileInstance).\ filter(FileInstance.id == object_version.file_id).\ update({"uri": key_to_pid[object_version.key]}) create_b2safe_file(self['external_pids'], bucket) del self['external_pids'] if self.model is None or self.model.json is None: raise MissingModelError() # automatically make embargoed records private if self.get('embargo_date') and self.get('open_access'): if is_under_embargo(self): self['open_access'] = False if 'community' in self: try: community = Community.get(self['community']) except CommunityDoesNotExistError as e: raise InvalidDepositError('Community {} does not exist.'.format( self['community'])) from e workflow = publication_workflows[community.publication_workflow] workflow(self.model, self) # publish the deposition if needed if (self['publication_state'] == PublicationStates.published.name # check invenio-deposit status so that we do not loop and self['_deposit']['status'] != PublicationStates.published.name): # Retrieve previous version in order to reindex it later. previous_version_pid = None # Save the previous "last" version for later use if self.versioning.parent.status == PIDStatus.REDIRECTED and \ self.versioning.has_children: previous_version_pid = self.versioning.last_child previous_version_uuid = str(RecordUUIDProvider.get( previous_version_pid.pid_value ).pid.object_uuid) external_pids = generate_external_pids(self) if external_pids: self['_deposit']['external_pids'] = external_pids super(Deposit, self).publish() # publish() already calls commit() # Register parent PID if necessary and update redirect self.versioning.update_redirect() # Reindex previous version. This is needed in order to update # the is_last_version flag if previous_version_pid is not None: self.indexer.index_by_id(previous_version_uuid) # save the action for later indexing if g: g.deposit_action = 'publish' else: super(Deposit, self).commit() if g: g.deposit_action = 'update-metadata' return self
def add_file(recid, fp, replace_existing): """Add a new file to a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket key = os.path.basename(fp.name) obj = ObjectVersion.get(bucket, key) if obj is not None and not replace_existing: click.echo(click.style(u'File with key "{key}" already exists.' u' Use `--replace-existing/-f` to overwrite it.'.format( key=key, recid=recid), fg='red')) return fp.seek(SEEK_SET, SEEK_END) size = fp.tell() fp.seek(SEEK_SET) click.echo(u'Will add the following file:\n') click.echo(click.style( u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format( key=key, bucket=bucket.id, size=size), fg='green')) click.echo(u'to record:\n') click.echo(click.style( u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format( recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if replace_existing and obj is not None: click.echo(u'and remove the file:\n') click.echo(click.style( u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format( key=obj.key, bucket=obj.bucket, size=obj.file.size), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False if obj is not None and replace_existing: ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, key, stream=fp, size=size) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File added successfully.', fg='green')) else: click.echo(click.style(u'File addition aborted.', fg='green'))
def _clean_file(cls, frame): """Clean object and file.""" obj = ObjectVersion.query.filter_by( version_id=frame['version_id']).one() ObjectVersion.delete(bucket=obj.bucket, key=obj.key)
def set_metadata( self, source: typing.Optional[typing.Union[BytesReader, dict]], metadata_class: typing.Type[Metadata], content_type: str = None, derived_from: str = None, replace: bool = True, ) -> typing.Optional[Metadata]: if isinstance(source, dict): source = io.BytesIO(json.dumps(source).encode("utf-8")) if not content_type: content_type = metadata_class.content_type existing_metadata_object = (ObjectVersion.query.join( ObjectVersion.tags).filter( ObjectVersion.is_head == true(), ObjectVersion.file_id.isnot(None), ObjectVersion.bucket == self.bucket, ObjectVersionTag.key == ObjectTagKey.MetadataFormat.value, ObjectVersionTag.value == metadata_class.metadata_format, ).first()) if source is None: if replace and existing_metadata_object: ObjectVersion.delete( bucket=existing_metadata_object.bucket, key=existing_metadata_object.key, ) if replace and (self.get("swordMetadataSourceFormat") == metadata_class.metadata_format): self.pop("swordMetadata", None) self.pop("swordMetadataSourceFormat", None) return None else: content_type, content_type_options = parse_options_header( content_type) encoding = content_type_options.get("charset") if isinstance(encoding, str): metadata = metadata_class.from_document( source, content_type=content_type, encoding=encoding, ) else: metadata = metadata_class.from_document( source, content_type=content_type, ) if existing_metadata_object and not replace: with existing_metadata_object.file.storage().open( ) as existing_metadata_f: existing_metadata = metadata_class.from_document( existing_metadata_f, content_type=metadata_class.content_type, ) try: metadata = existing_metadata + metadata except TypeError: raise Conflict( "Existing or new metadata is of wrong type for appending. Reconcile client-side and PUT instead" ) metadata_filename = self.metadata_key_prefix + metadata_class.filename if (isinstance(metadata, SWORDMetadata) or "swordMetadata" not in self or (not isinstance(metadata, SWORDMetadata) and self["swordMetadataSourceFormat"] == metadata_class.metadata_format)): metadata.update_record_metadata(self) self["swordMetadata"] = metadata.to_sword_metadata() self[ "swordMetadataSourceFormat"] = metadata_class.metadata_format object_version = ObjectVersion.create( bucket=self.bucket, key=metadata_filename, stream=io.BytesIO(bytes(metadata)), ) tags = TagManager(object_version) tags[ObjectTagKey.MetadataFormat] = metadata_class.metadata_format if derived_from: tags[ObjectTagKey.DerivedFrom] = derived_from return metadata