def add_file(recid, fp, replace_existing): """Add a new file to a published record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket key = os.path.basename(fp.name) obj = ObjectVersion.get(bucket, key) if obj is not None and not replace_existing: click.echo( click.style( u'File with key "{key}" already exists.' u' Use `--replace-existing/-f` to overwrite it.'.format( key=key, recid=recid), fg='red')) return fp.seek(SEEK_SET, SEEK_END) size = fp.tell() fp.seek(SEEK_SET) click.echo(u'Will add the following file:\n') click.echo( click.style(u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format(key=key, bucket=bucket.id, size=size), fg='green')) click.echo(u'to record:\n') click.echo( click.style(u' Title: "{title}"\n' u' RECID: {recid}\n' u' UUID: {uuid}\n' u''.format(recid=record['recid'], title=record['title'], uuid=record.id), fg='green')) if replace_existing and obj is not None: click.echo(u'and remove the file:\n') click.echo( click.style(u' key: "{key}"\n' u' bucket: {bucket}\n' u' size: {size}\n' u''.format(key=obj.key, bucket=obj.bucket, size=obj.file.size), fg='green')) if click.confirm(u'Continue?'): bucket.locked = False if obj is not None and replace_existing: ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, key, stream=fp, size=size) bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File added successfully.', fg='green')) else: click.echo(click.style(u'File addition aborted.', fg='green'))
def _create_record_from_filepath(path, rec_uuid, indexer, versions, verbose): with open(path) as record_file: record_str = record_file.read() record_str = resolve_community_id(record_str) record_str = resolve_block_schema_id(record_str) json_data = json.loads(record_str) b2share_deposit_uuid_minter(rec_uuid, data=json_data) deposit = Deposit.create(json_data, id_=rec_uuid) ObjectVersion.create(deposit.files.bucket, 'myfile', stream=BytesIO(b'mycontent')) deposit.publish() pid, record = deposit.fetch_published() indexer.index(record) if verbose > 0: click.secho('created new record: {}'.format(str(rec_uuid))) last_id = pid.pid_value for i in range(2*versions): rec_uuid = uuid4() json_data = json.loads(record_str) b2share_deposit_uuid_minter(rec_uuid, data=json_data) deposit2 = Deposit.create(json_data, id_=rec_uuid, version_of=last_id) ObjectVersion.create(deposit2.files.bucket, 'myfile-ver{}'.format(i), stream=BytesIO(b'mycontent')) deposit2.publish() pid, record2 = deposit2.fetch_published() indexer.index(record2) last_id = pid.pid_value if verbose > 0: click.secho('created new version: {}'.format(str(rec_uuid))) return record, deposit
def test_object_restore(app, db, dummy_location): """Restore object.""" f1 = FileInstance(uri="f1", size=1, checksum="mychecksum") f2 = FileInstance(uri="f2", size=2, checksum="mychecksum2") db.session.add(f1) db.session.add(f2) b1 = Bucket.create() obj1 = ObjectVersion.create(b1, "test").set_file(f1) ObjectVersion.create(b1, "test").set_file(f2) obj_deleted = ObjectVersion.delete(b1, "test") db.session.commit() assert ObjectVersion.query.count() == 3 # Cannot restore a deleted version. pytest.raises(InvalidOperationError, obj_deleted.restore) # Restore first version obj_new = obj1.restore() db.session.commit() assert ObjectVersion.query.count() == 4 assert obj_new.is_head is True assert obj_new.version_id != obj1.version_id assert obj_new.key == obj1.key assert obj_new.file_id == obj1.file_id assert obj_new.bucket == obj1.bucket
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, default_location=b1.location.uri) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def test_b2share_storage_with_pid(base_app, app, tmp_location, login_user, test_users): """Check that the storage class will redirect pid files.""" pid = 'http://hdl.handle.net/11304/74c66f0b-f814-4202-9dcb-4889ba9b1047' with app.app_context(): # Disable access control for this test tmp_location = Location.query.first() with db.session.begin_nested(): bucket = Bucket.create(tmp_location, storage_class='B') pid_file = FileInstance.create() pid_file.set_uri(pid, 1, 0, storage_class='B') ObjectVersion.create(bucket, 'test.txt', pid_file.id) db.session.commit() url = url_for('invenio_files_rest.object_api', bucket_id=bucket.id, key='test.txt') try: with app.app_context(): permission = current_files_rest.permission_factory current_files_rest.permission_factory = allow_all # Check that accessing the file redirects to the PID with app.test_client() as client: resp = client.get(url) assert resp.headers['Location'] == pid assert resp.status_code == 302 finally: with app.app_context(): current_files_rest.permission_factory = permission
def test_video_dumps(db, api_project, video): """Test video dump, in particular file dump.""" (project, video_1, video_2) = api_project bucket_id = video_1['_buckets']['deposit'] obj = ObjectVersion.create(bucket=bucket_id, key='master.mp4', stream=open(video, 'rb')) slave_1 = ObjectVersion.create(bucket=bucket_id, key='slave_1.mp4', stream=open(video, 'rb')) ObjectVersionTag.create(slave_1, 'master', str(obj.version_id)) ObjectVersionTag.create(slave_1, 'media_type', 'video') ObjectVersionTag.create(slave_1, 'context_type', 'subformat') for i in reversed(range(10)): slave = ObjectVersion.create(bucket=bucket_id, key='frame-{0}.jpeg'.format(i), stream=BytesIO(b'\x00' * 1024)) ObjectVersionTag.create(slave, 'master', str(obj.version_id)) ObjectVersionTag.create(slave, 'media_type', 'image') ObjectVersionTag.create(slave, 'context_type', 'frame') db.session.commit() files = video_1.files.dumps() assert len(files) == 1 files = files[0] # only one master file assert 'frame' in files assert [f['key'] for f in files['frame'] ] == ['frame-{}.jpeg'.format(i) for i in range(10)] assert 'subformat' in files assert len(files['subformat']) == 1
def publish(self): """Publish GitHub release as record.""" id_ = uuid.uuid4() deposit = None try: db.session.begin_nested() deposit = self.deposit_class.create(self.metadata, id_=id_) deposit['_deposit']['created_by'] = self.event.user_id deposit['_deposit']['owners'] = [self.event.user_id] # Fetch the deposit files for key, url in self.files: # Make a HEAD request to get GitHub to compute the # Content-Length. res = self.gh.api.session.head(url, allow_redirects=True) # Now, download the file res = self.gh.api.session.get(url, stream=True) if res.status_code != 200: raise Exception( "Could not retrieve archive from GitHub: {url}".format( url=url)) size = int(res.headers.get('Content-Length', 0)) ObjectVersion.create( bucket=deposit.files.bucket, key=key, stream=res.raw, size=size or None, mimetype=res.headers.get('Content-Type'), ) # GitHub-specific SIP store agent sip_agent = { '$schema': current_jsonschemas.path_to_url( current_app.config['SIPSTORE_GITHUB_AGENT_JSONSCHEMA']), 'user_id': self.event.user_id, 'github_id': self.release['author']['id'], 'email': self.gh.account.user.email, } deposit.publish(user_id=self.event.user_id, sip_agent=sip_agent) self.model.recordmetadata = deposit.model db.session.commit() # Send Datacite DOI registration task recid_pid, record = deposit.fetch_published() datacite_register.delay(recid_pid.pid_value, str(record.id)) except Exception: db.session.rollback() # Remove deposit from index since it was not commited. if deposit and deposit.id: try: RecordIndexer().delete(deposit) except Exception: current_app.logger.exception( "Failed to remove uncommited deposit from index.") raise
def create_file_in_bucket(bucket_id): """ Route to upload file or files to provided bucket Args: bucket_id (str): Bucket Identifier Returns: flask.Response with content serialization as JSON """ files_key = next(request.files.keys()) for file_storage in request.files.getlist(files_key): with db.session.begin_nested(): bucket = db.session.query(Bucket).filter( Bucket.id == bucket_id).first() assert bucket is not None ObjectVersion.create(bucket, file_storage.filename, stream=file_storage.stream) db.session.commit() return jsonify({})
def test_record_publish_adds_no_handles_for_external_files(app, records_data_with_external_pids, test_records_data): """Test that no handle PIDs are created for external files.""" for metadata in test_records_data: with app.app_context(): app.config.update({'FAKE_EPIC_PID': True}) external_pids = records_data_with_external_pids['external_pids'] external_dict = {x['key']: x['ePIC_PID'] for x in external_pids} data = deepcopy(metadata) data['external_pids'] = deepcopy(external_pids) record_uuid = uuid.uuid4() b2share_deposit_uuid_minter(record_uuid, data=data) deposit = Deposit.create(data, id_=record_uuid) ObjectVersion.create(deposit.files.bucket, 'real_file_1.txt', stream=BytesIO(b'mycontent')) ObjectVersion.create(deposit.files.bucket, 'real_file_2.txt', stream=BytesIO(b'mycontent')) deposit.submit() deposit.publish() deposit.commit() _, record = deposit.fetch_published() # external files don't get a handle PID, they already have one # which is stored in record['_deposit']['external_pids'] for f in record.files: if f['key'] in external_dict: assert f.get('ePIC_PID') is None else: assert '0000' in f['ePIC_PID'] # is a new fake PID
def persist_file_content(record: CernSearchRecord, file_content: str, filename: str): """Persist file's extracted content in bucket on filesystem and database.""" current_app.logger.debug(f"Persist file: {filename} in record {record.id}") bucket_content = record.files_content.bucket ObjectVersion.create(bucket_content, filename, stream=BytesIO(file_content.encode())) db.session.commit()
def save_and_validate_logo(logo_stream, logo_filename, community_id): """Validate if communities logo is in limit size and save it.""" cfg = current_app.config logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID'] logo_max_size = cfg['COMMUNITIES_LOGO_MAX_SIZE'] logos_bucket = Bucket.query.get(logos_bucket_id) ext = os.path.splitext(logo_filename)[1] ext = ext[1:] if ext.startswith('.') else ext logo_stream.seek(SEEK_SET, SEEK_END) # Seek from beginning to end logo_size = logo_stream.tell() if logo_size > logo_max_size: return None if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']: key = "{0}/logo.{1}".format(community_id, ext) logo_stream.seek(0) # Rewind the stream to the beginning ObjectVersion.create(logos_bucket, key, stream=logo_stream, size=logo_size) return ext else: return None
def _create_record_from_filepath(path, rec_uuid, indexer, versions, verbose): with open(path) as record_file: record_str = record_file.read() record_str = resolve_community_id(record_str) record_str = resolve_block_schema_id(record_str) json_data = json.loads(record_str) b2share_deposit_uuid_minter(rec_uuid, data=json_data) deposit = Deposit.create(json_data, id_=rec_uuid) ObjectVersion.create(deposit.files.bucket, 'myfile', stream=BytesIO(b'mycontent')) deposit.publish() pid, record = deposit.fetch_published() indexer.index(record) if verbose > 0: click.secho('created new record: {}'.format(str(rec_uuid))) last_id = pid.pid_value for i in range(2 * versions): rec_uuid = uuid4() json_data = json.loads(record_str) b2share_deposit_uuid_minter(rec_uuid, data=json_data) deposit2 = Deposit.create(json_data, id_=rec_uuid, version_of=last_id) ObjectVersion.create(deposit2.files.bucket, 'myfile-ver{}'.format(i), stream=BytesIO(b'mycontent')) deposit2.publish() pid, record2 = deposit2.fetch_published() indexer.index(record2) last_id = pid.pid_value if verbose > 0: click.secho('created new version: {}'.format(str(rec_uuid))) return record, deposit
def rename_file(recid, key, new_key): """Remove a file from a publishd record.""" pid, record = record_resolver.resolve(recid) bucket = record.files.bucket obj = ObjectVersion.get(bucket, key) if obj is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key), fg='red')) return new_obj = ObjectVersion.get(bucket, new_key) if new_obj is not None: click.echo(click.style(u'File with key "{key}" already exists.'.format( key=new_key), fg='red')) return if click.confirm(u'Rename "{key}" to "{new_key}" on bucket {bucket}.' u' Continue?'.format( key=obj.key, new_key=new_key, bucket=bucket.id)): record.files.bucket.locked = False file_id = obj.file.id ObjectVersion.delete(bucket, obj.key) ObjectVersion.create(bucket, new_key, _file_id=file_id) record.files.bucket.locked = True record.files.flush() record.commit() db.session.commit() click.echo(click.style(u'File renamed successfully.', fg='green')) else: click.echo(click.style(u'Aborted file rename.', fg='green'))
def test_object_relink_all(app, db, dummy_location): """Test relinking files.""" b1 = Bucket.create() obj1 = ObjectVersion.create( b1, "relink-test", stream=BytesIO(b('relinkthis'))) ObjectVersion.create(b1, "do-not-touch", stream=BytesIO(b('na'))) b1.snapshot() db.session.commit() assert ObjectVersion.query.count() == 4 assert FileInstance.query.count() == 2 fnew = FileInstance.create() fnew.copy_contents(obj1.file, location=b1.location) db.session.commit() fold = obj1.file assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 2 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 0 ObjectVersion.relink_all(obj1.file, fnew) db.session.commit() assert ObjectVersion.query.filter_by(file_id=fold.id).count() == 0 assert ObjectVersion.query.filter_by(file_id=fnew.id).count() == 2
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def test_object_create(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() # Create one object version obj1 = ObjectVersion.create(b, "test") assert obj1.bucket_id == b.id assert obj1.key == 'test' assert obj1.version_id assert obj1.file_id is None assert obj1.is_head is True assert obj1.bucket == b # Set fake location. obj1.set_location("file:///tmp/obj1", 1, "checksum") # Create one object version for same object key obj2 = ObjectVersion.create(b, "test") assert obj2.bucket_id == b.id assert obj2.key == 'test' assert obj2.version_id != obj1.version_id assert obj2.file_id is None assert obj2.is_head is True assert obj2.bucket == b # Set fake location obj2.set_location("file:///tmp/obj2", 2, "checksum") # Create a new object version for a different object with no location. # I.e. it is considered a delete marker. obj3 = ObjectVersion.create(b, "deleted_obj") # Object __repr__ assert str(obj1) == \ "{0}:{1}:{2}".format(obj1.bucket_id, obj1.version_id, obj1.key) # Sanity check assert ObjectVersion.query.count() == 3 # Assert that obj2 is the head version obj = ObjectVersion.get(b.id, "test", version_id=obj1.version_id) assert obj.version_id == obj1.version_id assert obj.is_head is False obj = ObjectVersion.get(b.id, "test", version_id=obj2.version_id) assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that getting latest version gets obj2 obj = ObjectVersion.get(b.id, "test") assert obj.version_id == obj2.version_id assert obj.is_head is True # Assert that obj3 is not retrievable (without specifying version id). assert ObjectVersion.get(b.id, "deleted_obj") is None # Assert that obj3 *is* retrievable (when specifying version id). assert \ ObjectVersion.get(b.id, "deleted_obj", version_id=obj3.version_id) == \ obj3
def objects(db, bucket): """File system location.""" obj1 = ObjectVersion.create( bucket, 'LICENSE', stream=BytesIO(b('license file'))) obj2 = ObjectVersion.create( bucket, 'README.rst', stream=BytesIO(b('readme file'))) db.session.commit() yield [obj1, obj2]
def attach_file(file_id, pid_type1, pid_value1, key1, pid_type2, pid_value2, key2): """Attach a file to a record or deposit. You must provide the information which will determine the first file, i.e.: either 'file-id' OR 'pid-type1', 'pid-value1' and 'key1'. Additionally you need to specify the information on the target record/deposit, i.e.: 'pid-type2', 'pid-value2' and 'key2'. """ assert ((file_id or (pid_type1 and pid_value1 and key1)) and (pid_type2 and pid_value2 and key2)) msg = u"PID type must be 'recid' or 'depid'." if pid_type1: assert pid_type1 in ('recid', 'depid', ), msg assert pid_type2 in ('recid', 'depid', ), msg if not file_id: resolver = record_resolver if pid_type1 == 'recid' \ else deposit_resolver pid1, record1 = resolver.resolve(pid_value1) bucket1 = record1.files.bucket obj1 = ObjectVersion.get(bucket1, key1) if obj1 is None: click.echo(click.style(u'File with key "{key}" not found.'.format( key=key1), fg='red')) return file_id = obj1.file.id resolver = record_resolver if pid_type2 == 'recid' else deposit_resolver pid2, record2 = resolver.resolve(pid_value2) bucket2 = record2.files.bucket obj2 = ObjectVersion.get(bucket2, key2) if obj2 is not None: click.echo(click.style(u'File with key "{key}" already exists on' u' bucket {bucket}.'.format( key=key2, bucket=bucket2.id), fg='red')) return if click.confirm(u'Attaching file "{file_id}" to bucket {bucket2}' u' as "{key2}". Continue?'.format( file_id=file_id, key2=key2, bucket2=bucket2.id)): record2.files.bucket.locked = False ObjectVersion.create(bucket2, key2, _file_id=file_id) if pid_type2 == 'recid': record2.files.bucket.locked = True record2.files.flush() record2.commit() db.session.commit() click.echo(click.style(u'File attached successfully.', fg='green')) else: click.echo(click.style(u'Aborted file attaching.', fg='green'))
def test_object_create_with_fileid(app, db, dummy_location): """Test object creation.""" with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, 'test', stream=BytesIO(b'test')) assert b.size == 4 ObjectVersion.create(b, 'test', _file_id=obj.file) assert b.size == 8
def create_b2safe_file(external_pids, bucket): """Create a FileInstance which contains a PID in its uri.""" validate_schema( external_pids, { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'ePIC_PID': { 'type': 'string' }, 'key': { 'type': 'string' } }, 'additionalProperties': False, 'required': ['ePIC_PID', 'key'] } }) keys_list = [e['key'] for e in external_pids] keys_set = set(keys_list) if len(keys_list) != len(keys_set): raise InvalidDepositError([ FieldError('external_pids', 'Field external_pids contains duplicate keys.') ]) for external_pid in external_pids: if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'): external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \ external_pid['ePIC_PID'] if external_pid['key'].startswith('/'): raise InvalidDepositError([ FieldError('external_pids', 'File key cannot start with a "/".') ]) try: # Create the file instance if it does not already exist file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID']) if file_instance is None: file_instance = FileInstance.create() file_instance.set_uri(external_pid['ePIC_PID'], 1, 0, storage_class='B') assert file_instance.storage_class == 'B' # Add the file to the bucket if it is not already in it current_version = ObjectVersion.get(bucket, external_pid['key']) if not current_version or \ current_version.file_id != file_instance.id: ObjectVersion.create(bucket, external_pid['key'], file_instance.id) except IntegrityError as e: raise InvalidDepositError( [FieldError('external_pids', 'File URI already exists.')])
def test_object_set_contents(app, db, dummy_location): """Test object set contents.""" with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") assert obj.file_id is None assert FileInstance.query.count() == 0 # Save a file. with open('LICENSE', 'rb') as fp: obj.set_contents(fp) # Assert size, location and checksum assert obj.file_id is not None assert obj.file.uri is not None assert obj.file.size == getsize('LICENSE') assert obj.file.checksum is not None assert b1.size == obj.file.size # Try to overwrite with db.session.begin_nested(): with open('LICENSE', 'rb') as fp: pytest.raises(FileInstanceAlreadySetError, obj.set_contents, fp) # Save a new version with different content with db.session.begin_nested(): obj2 = ObjectVersion.create(b1, "LICENSE") with open('README.rst', 'rb') as fp: obj2.set_contents(fp) assert obj2.file_id is not None and obj2.file_id != obj.file_id assert obj2.file.size == getsize('README.rst') assert obj2.file.uri != obj.file.uri assert Bucket.get(b1.id).size == obj.file.size + obj2.file.size obj2.file.verify_checksum() assert obj2.file.last_check_at assert obj2.file.last_check is True old_checksum = obj2.file.checksum obj2.file.checksum = "md5:invalid" assert obj2.file.verify_checksum() is False previous_last_check = obj2.file.last_check previous_last_check_date = obj2.file.last_check_at with db.session.begin_nested(): obj2.file.checksum = old_checksum obj2.file.uri = 'invalid' pytest.raises(ResourceNotFoundError, obj2.file.verify_checksum) assert obj2.file.last_check == previous_last_check assert obj2.file.last_check_at == previous_last_check_date obj2.file.verify_checksum(throws=False) assert obj2.file.last_check is None assert obj2.file.last_check_at != previous_last_check_date
def test_deposit_vtt_tags(api_app, db, api_project, users): """Test VTT tag generation.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] # insert a master file inside the video add_master_to_video( video_deposit=video_1, filename='test.mp4', stream=BytesIO(b'1234'), video_duration="15" ) # try to insert a new vtt object obj = ObjectVersion.create( video_1._bucket, key="test_fr.vtt", stream=BytesIO(b'hello')) # publish the video prepare_videos_for_publish([video_1]) video_1 = deposit_video_resolver(video_1_depid) login_user(User.query.get(users[0])) video_1 = video_1.publish() # check tags check_object_tags(obj, video_1, content_type='vtt', media_type='subtitle', context_type='subtitle', language='fr') # edit the video video_1 = video_1.edit() # try to delete the old vtt file and substitute with a new one video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create( video_1._bucket, key="test_en.vtt", stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj2, video_1, content_type='vtt', media_type='subtitle', context_type='subtitle', language='en') # edit a re-published video video_1 = video_1.edit() # add a new vtt file obj3 = ObjectVersion.create( video_1._bucket, key="test_it.vtt", stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj3, video_1, content_type='vtt', media_type='subtitle', context_type='subtitle', language='it')
def _create_bucket(deposit, record_json, directory, logfile): for index, file_dict in enumerate(record_json.get('files', [])): click.secho(' Load file "{}"'.format(file_dict.get('name'))) filepath = os.path.join(directory, 'file_{}'.format(index)) if int(os.path.getsize(filepath)) != int(file_dict.get('size')): logfile.write("\n********************") logfile.write("\nERROR: downloaded file size differs for file {}: {} instead of {}" .format(filepath, os.path.getsize(filepath), file_dict.get('size'))) logfile.write("\n********************") else: with open(filepath, 'r+b') as f: ObjectVersion.create(deposit.files.bucket, file_dict['name'], stream=BytesIO(f.read()))
def test_bucket_sync_new_object(app, db, dummy_location): """Test that a new file in src in synced to dest.""" b1 = Bucket.create() b2 = Bucket.create() ObjectVersion.create(b1, "filename").set_location("b1v1", 1, "achecksum") db.session.commit() assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 0 b1.sync(b2) assert ObjectVersion.get_by_bucket(b1).count() == 1 assert ObjectVersion.get_by_bucket(b2).count() == 1 assert ObjectVersion.get(b2, "filename")
def test_object_version_tags(app, db, dummy_location): """Test object version tags.""" f = FileInstance(uri="f1", size=1, checksum="mychecksum") db.session.add(f) db.session.commit() b = Bucket.create() obj1 = ObjectVersion.create(b, "test").set_file(f) ObjectVersionTag.create(obj1, "mykey", "testvalue") ObjectVersionTag.create(obj1, "another_key", "another value") db.session.commit() # Duplicate key pytest.raises( IntegrityError, ObjectVersionTag.create, obj1, "mykey", "newvalue") # Test get assert ObjectVersionTag.query.count() == 2 assert ObjectVersionTag.get(obj1, "mykey").value == "testvalue" assert ObjectVersionTag.get_value(obj1.version_id, "another_key") \ == "another value" assert ObjectVersionTag.get_value(obj1, "invalid") is None # Test delete ObjectVersionTag.delete(obj1, "mykey") assert ObjectVersionTag.query.count() == 1 ObjectVersionTag.delete(obj1, "invalid") assert ObjectVersionTag.query.count() == 1 # Create or update ObjectVersionTag.create_or_update(obj1, "another_key", "newval") ObjectVersionTag.create_or_update(obj1.version_id, "newkey", "testval") db.session.commit() assert ObjectVersionTag.get_value(obj1, "another_key") == "newval" assert ObjectVersionTag.get_value(obj1, "newkey") == "testval" # Get tags as dictionary assert obj1.get_tags() == dict(another_key="newval", newkey="testval") obj2 = ObjectVersion.create(b, 'test2') assert obj2.get_tags() == dict() # Copy object version obj_copy = obj1.copy() db.session.commit() assert obj_copy.get_tags() == dict(another_key="newval", newkey="testval") assert ObjectVersionTag.query.count() == 4 # Cascade delete ObjectVersion.query.delete() db.session.commit() assert ObjectVersionTag.query.count() == 0
def test_object_mimetype(app, db, dummy_location): """Test object set file.""" b = Bucket.create() db.session.commit() obj1 = ObjectVersion.create(b, "test.pdf", stream=BytesIO(b'pdfdata')) obj2 = ObjectVersion.create(b, "README", stream=BytesIO(b'pdfdata')) assert obj1.mimetype == "application/pdf" assert obj2.mimetype == "application/octet-stream" # Override computed MIME type. obj2.mimetype = "text/plain" db.session.commit() assert ObjectVersion.get(b, "README").mimetype == "text/plain"
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.add(loc) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def test_deposit_poster_tags(api_app, db, api_project, users): """Test poster tag generation.""" project, video_1, video_2 = api_project video_1_depid = video_1['_deposit']['id'] master_video_filename = 'test.mp4' poster_filename = 'poster.jpg' poster_filename2 = 'poster.png' # insert a master file inside the video add_master_to_video(video_deposit=video_1, filename=master_video_filename, stream=BytesIO(b'1234'), video_duration='15') # try to insert a new vtt object obj = ObjectVersion.create(video_1._bucket, key=poster_filename, stream=BytesIO(b'hello')) # publish the video prepare_videos_for_publish([video_1]) video_1 = deposit_video_resolver(video_1_depid) login_user(User.query.get(users[0])) video_1 = video_1.publish() # check tags check_object_tags(obj, video_1, content_type='jpg', context_type='poster', media_type='image') # edit the video video_1 = video_1.edit() # try to delete the old poster frame and substitute with a new one video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create(video_1._bucket, key=poster_filename2, stream=BytesIO(b'hello')) # publish again the video video_1 = video_1.publish() # check tags check_object_tags(obj2, video_1, content_type='png', context_type='poster', media_type='image')
def files(): """Load files.""" srcroot = dirname(dirname(__file__)) d = current_app.config['DATADIR'] if exists(d): shutil.rmtree(d) makedirs(d) # Clear data Part.query.delete() MultipartObject.query.delete() ObjectVersion.query.delete() Bucket.query.delete() FileInstance.query.delete() Location.query.delete() db.session.commit() # Create location loc = Location(name='local', uri=d, default=True) db.session.commit() # Bucket 0 b1 = Bucket.create(loc) b1.id = '00000000-0000-0000-0000-000000000000' for f in ['README.rst', 'LICENSE']: with open(join(srcroot, f), 'rb') as fp: ObjectVersion.create(b1, f, stream=fp) # Bucket 1 b2 = Bucket.create(loc) b2.id = '11111111-1111-1111-1111-111111111111' k = 'AUTHORS.rst' with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'AUTHORS.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) k = 'RELEASE-NOTES.rst' with open(join(srcroot, 'RELEASE-NOTES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) with open(join(srcroot, 'CHANGES.rst'), 'rb') as fp: ObjectVersion.create(b2, k, stream=fp) ObjectVersion.delete(b2.id, k) # Bucket 2 b2 = Bucket.create(loc) b2.id = '22222222-2222-2222-2222-222222222222' db.session.commit()
def save_and_validate_logo(logo_stream, logo_filename, community_id): """Validate if communities logo is in limit size and save it.""" cfg = current_app.config logos_bucket_id = cfg['COMMUNITIES_BUCKET_UUID'] logos_bucket = Bucket.query.get(logos_bucket_id) ext = os.path.splitext(logo_filename)[1] ext = ext[1:] if ext.startswith('.') else ext if ext in cfg['COMMUNITIES_LOGO_EXTENSIONS']: key = "{0}/logo.{1}".format(community_id, ext) ObjectVersion.create(logos_bucket, key, stream=logo_stream) return ext else: return None
def objects(db, bucket): """File system location.""" data_bytes = b('license file') obj1 = ObjectVersion.create( bucket, 'LICENSE', stream=BytesIO(data_bytes), size=len(data_bytes) ) data_bytes2 = b('readme file') obj2 = ObjectVersion.create( bucket, 'README.rst', stream=BytesIO(data_bytes2), size=len(data_bytes2) ) db.session.commit() yield [obj1, obj2]
def save_file(self, content, filename, size, failed=False): """Save file with given content in deposit bucket. If downloading a content failed, file will be still created, with tag `failed`. :param content: stream :param filename: name that file will be saved with :param size: size of content :param failed: if failed during downloading the content """ obj = ObjectVersion.create(bucket=self.files.bucket, key=filename) obj.file = FileInstance.create() self.files.flush() if not failed: self.files[filename].file.set_contents( content, default_location=self.files.bucket.location.uri, size=size) print('File {} saved ({}b).\n'.format(filename, size)) else: ObjectVersionTag.create(object_version=obj, key='status', value='failed') print('File {} not saved.\n'.format(filename)) self.files.flush() db.session.commit() return obj
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b = Bucket.create() obj = ObjectVersion.create(b, "LICENSE") obj.file = FileInstance() db.session.add(obj.file) storage = PyFilesystemStorage(obj, obj.file) with open('LICENSE', 'rb') as fp: loc, size, checksum = storage.save(fp) # Verify checksum, size and location. with open('LICENSE', 'rb') as fp: m = hashlib.md5() m.update(fp.read()) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == getsize('LICENSE') assert size == getsize('LICENSE') assert loc == \ join( dummy_location.uri, str(b.id), str(obj.version_id), "data")
def _copy_file(self, source_record, ov, target_record, file_md, source_record_context, target_record_context): bucket = target_record.bucket new_ob = ObjectVersion.create(bucket, ov.key, _file_id=ov.file_id) tags = {tag.key: tag.value for tag in ov.tags} for _, res in file_copied.send( source_record, source_record=source_record, target_record=target_record, object_version=ov, tags=tags, metadata=file_md, source_record_context=source_record_context, target_record_context=target_record_context): if res is False: return False # skip this file for key, value in tags: ObjectVersionTag.create_or_update(object_version=new_ob, key=key, value=value) file_md['bucket'] = str(bucket.id) file_md['file_id'] = str(new_ob.file_id) file_md['version_id'] = str(new_ob.version_id) return True
def test_verify_checksum(app, db, dummy_location): """Test celery tasks for checksum verification.""" b1 = Bucket.create() with open('README.rst', 'rb') as fp: obj = ObjectVersion.create(b1, 'README.rst', stream=fp) db.session.commit() file_id = obj.file_id verify_checksum(str(file_id)) f = FileInstance.query.get(file_id) assert f.last_check_at assert f.last_check is True f.uri = 'invalid' db.session.add(f) db.session.commit() pytest.raises(ResourceNotFoundError, verify_checksum, str(file_id), throws=True) f = FileInstance.query.get(file_id) assert f.last_check is True verify_checksum(str(file_id), throws=False) f = FileInstance.query.get(file_id) assert f.last_check is None f.last_check = True db.session.add(f) db.session.commit() with pytest.raises(ResourceNotFoundError): verify_checksum(str(file_id), pessimistic=True) f = FileInstance.query.get(file_id) assert f.last_check is None
def handle_record_files(data, bucket, files, skip_files): """Handles record files.""" for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file try: f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) obj = ObjectVersion.create( bucket, filename, _file_id=f.id ) file.update({ 'bucket': str(obj.bucket_id), 'checksum': obj.file.checksum, 'key': obj.key, 'version_id': str(obj.version_id), }) except Exception as e: click.echo( 'Recid {0} file {1} could not be loaded due ' 'to {2}.'.format(data.get('recid'), filename, str(e))) continue
def test_SIP_files(db): """Test the files methods of API SIP.""" # we create a SIP model sip = SIP_.create() db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.files) == 0 # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() # we attach it to the SIP sf = api_sip.attach_file(obj) db.session.commit() assert len(api_sip.files) == 1 assert api_sip.files[0].filepath == 'test.txt' assert sip.sip_files[0].filepath == 'test.txt' # finalization rmtree(tmppath)
def test_pyfilesystemstorage(app, db, dummy_location): """Test pyfs storage.""" # Create bucket and object with db.session.begin_nested(): b1 = Bucket.create() obj = ObjectVersion.create(b1, "LICENSE") obj.file = FileInstance.create() storage = PyFilesystemStorage(obj.file, base_uri=obj.bucket.location.uri) counter = dict(size=0) def callback(total, size): counter['size'] = size data = b("this is some content") stream = BytesIO(data) loc, size, checksum = storage.save(stream, progress_callback=callback) # Verify checksum, size and location. m = hashlib.md5() m.update(data) assert "md5:{0}".format(m.hexdigest()) == checksum assert size == len(data) assert loc == join( dummy_location.uri, str(obj.file.id), "data")
def get_local_file(bucket, datadir, filename): """Create local file as objectversion.""" stream = open(join(datadir, filename), 'rb') object_version = ObjectVersion.create(bucket, "test.mp4", stream=stream) version_id = object_version.version_id db.session.commit() return version_id
def __setitem__(self, key, stream): """Add file inside a deposit.""" with db.session.begin_nested(): # save the file obj = ObjectVersion.create( bucket=self.bucket, key=key, stream=stream) self.filesmap[key] = self.file_cls(obj, {}).dumps() self.flush()
def open(self): """Open the bucket for writing.""" self.obj = ObjectVersion.create( self.bucket_id, self.key() if callable(self.key) else self.key ) db.session.commit() return self
def create_b2safe_file(external_pids, bucket): """Create a FileInstance which contains a PID in its uri.""" validate_schema(external_pids, { 'type': 'array', 'items': { 'type': 'object', 'properties': { 'ePIC_PID': {'type': 'string'}, 'key': {'type': 'string'} }, 'additionalProperties': False, 'required': ['ePIC_PID', 'key'] } }) keys_list = [e['key'] for e in external_pids] keys_set = set(keys_list) if len(keys_list) != len(keys_set): raise InvalidDepositError([FieldError('external_pids', 'Field external_pids contains duplicate keys.')]) for external_pid in external_pids: if not external_pid['ePIC_PID'].startswith('http://hdl.handle.net/'): external_pid['ePIC_PID'] = 'http://hdl.handle.net/' + \ external_pid['ePIC_PID'] if external_pid['key'].startswith('/'): raise InvalidDepositError( [FieldError('external_pids', 'File key cannot start with a "/".')]) try: # Create the file instance if it does not already exist file_instance = FileInstance.get_by_uri(external_pid['ePIC_PID']) if file_instance is None: file_instance = FileInstance.create() file_instance.set_uri( external_pid['ePIC_PID'], 1, 0, storage_class='B') assert file_instance.storage_class == 'B' # Add the file to the bucket if it is not already in it current_version = ObjectVersion.get(bucket, external_pid['key']) if not current_version or \ current_version.file_id != file_instance.id: ObjectVersion.create(bucket, external_pid['key'], file_instance.id) except IntegrityError as e: raise InvalidDepositError( [FieldError('external_pids', 'File URI already exists.')])
def test_object_set_file(app, db, dummy_location): """Test object set file.""" b = Bucket.create() f = FileInstance(uri="f1", size=1, checksum="mychecksum") obj = ObjectVersion.create(b, "test").set_file(f) db.session.commit() assert obj.file == f assert pytest.raises(FileInstanceAlreadySetError, obj.set_file, f)
def __setitem__(self, key, stream): """Add file inside a deposit.""" with db.session.begin_nested(): size = None if request and request.files and request.files.get('file'): size = request.files['file'].content_length or None obj = ObjectVersion.create( bucket=self.bucket, key=key, stream=stream, size=size) self.filesmap[key] = self.file_cls(obj, {}).dumps() self.flush()
def __setitem__(self, key, stream): """Add file inside a deposit.""" with db.session.begin_nested(): # save the file obj = ObjectVersion.create(bucket=self.bucket, key=key, stream=stream) # update deposit['_files'] if key not in self.record['_files']: self.record['_files'].append({'key': key})
def test_object(db, bucket): """File system location.""" data_bytes = b('test object') obj = ObjectVersion.create( bucket, 'test.txt', stream=BytesIO(data_bytes), size=len(data_bytes) ) db.session.commit() return obj
def image_object(database, location, image_path): """Get ObjectVersion of test image.""" bucket = Bucket.create() database.session.commit() with open(image_path, 'rb') as fp: obj = ObjectVersion.create( bucket, 'test.jpg', stream=fp, size=getsize(image_path) ) database.session.commit() return obj