def test_sip_model(db): """Test the SIP model.""" user1 = create_test_user('*****@*****.**') # Valid agent JSON agent1 = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} # Invalid agent JSON agent2 = { 'email': ['should', 'not', 'be', 'a', 'list'], 'ip_address': {'definitely': 'not', 'a': 'dict'}, } # Agent JSON with wrong schema agent3 = { 'email': '*****@*****.**', 'ip_address': '1.1.1.1', '$schema': 'http://incorrect/agent/schema.json', } sip1 = SIP.create(user_id=user1.id, agent=agent1) assert sip1.user == user1 SIP.create() SIP.create(user_id=user1.id, agent=agent1) assert SIP.query.count() == 3 pytest.raises(ValidationError, SIP.create, agent=agent2) pytest.raises(SIPUserDoesNotExist, SIP.create, user_id=5) pytest.raises(JSONSchemaNotFound, SIP.create, agent=agent3) db.session.commit()
def test_SIP_files(db): """Test the files methods of API SIP.""" # we create a SIP model sip = SIP_.create() db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.files) == 0 # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() # we attach it to the SIP sf = api_sip.attach_file(obj) db.session.commit() assert len(api_sip.files) == 1 assert api_sip.files[0].filepath == 'test.txt' assert sip.sip_files[0].filepath == 'test.txt' # finalization rmtree(tmppath)
def test_Archive_get_realstatus_transfer(db, client, oauth2): """Test the Archive's get method with transfer processing.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id', archivematica_id=uuid.uuid4()) ark.status = ArchiveStatus.WAITING db.session.commit() mock_response = Response() mock_response.status_code = 200 mock_response._content = json.dumps({ 'status': 'SIP_PROCESSING' }).encode('utf-8') with patch('requests.get', return_value=mock_response): response = client.get(url_for('invenio_archivematica_api.archive_api', accession_id=ark.accession_id, access_token=oauth2.token), data=json.dumps({'realStatus': True}), content_type='application/json') assert response.status_code == 200 result = json.loads(response.data.decode('utf-8')) assert 'sip_id' in result and result['sip_id'] == str(sip.id) assert 'status' in result and result['status'] == 'PROCESSING_TRANSFER' assert 'accession_id' in result and result['accession_id'] == 'id' assert 'archivematica_id' in result \ and result['archivematica_id'] == str(ark.archivematica_id)
def create(cls, pid, record, create_sip_files=True, user_id=None, agent=None): """Create a Zenodo SIP, from the PID and the Record. Apart from the SIP itself, it also creates ``RecordSIP`` for the SIP-PID-Record relationship, as well as ``SIPFile`` objects for each the files in the record. Those objects are not returned by this function but can be fetched by the corresponding SIP relationships 'record_sips' and 'sip_files'. :param pid: PID of the published record ('recid'). :type pid: `invenio_pidstore.models.PersistentIdentifier` :param record: Record for which the SIP should be created. :type record: `invenio_records.api.Record` :param create_sip_files: If True the SIPFiles will be created. :type create_sip_files: bool :returns: A Zenodo-specifi SIP object. :rtype: ``invenio_sipstore.models.SIP`` """ if not user_id: user_id = (None if current_user.is_anonymous else current_user.get_id()) if not agent: agent = cls._build_agent_info() with db.session.begin_nested(): sip = SIP.create('json', json.dumps(record.dumps()), user_id=user_id, agent=agent) recsip = RecordSIP(sip_id=sip.id, pid_id=pid.id) db.session.add(recsip) if record.files and create_sip_files: for f in record.files: sf = SIPFile(sip_id=sip.id, filepath=f.key, file_id=f.file_id) db.session.add(sf) return sip
def create_sips(cls, dump, deposit, files, recid): """Create submission information packages.""" if not recid or recid.status == PIDStatus.RESERVED: return first = True for s in dump.sips: # Create SIP sip = SIP.create( s['format'], s['content'], user_id=s['user_id'], agent=s['agent'], id_=s['id'], ) sip.created = s['timestamp'] # Create SIP files only for first package. if first: first = False for meta, f in files: db.session.add(SIPFile( sip_id=sip.id, filepath=meta['key'], file_id=f.id )) # PID - SIP relationship db.session.add(RecordSIP(sip_id=sip.id, pid_id=recid.id))
def create_sips(cls, dump, deposit, files, recid): """Create submission information packages.""" if not recid or recid.status == PIDStatus.RESERVED: return first = True for s in dump.sips: # Create SIP sip = SIP.create( s['format'], s['content'], user_id=s['user_id'], agent=s['agent'], id_=s['id'], ) sip.created = s['timestamp'] # Create SIP files only for first package. if first: first = False for meta, f in files: db.session.add( SIPFile(sip_id=sip.id, filepath=meta['key'], file_id=f.id)) # PID - SIP relationship db.session.add(RecordSIP(sip_id=sip.id, pid_id=recid.id))
def test_create_accessioned_id(db): """Test ``create_accessioned_id`` function.""" # First, we create a SIP sip = SIP.create() ark = Archive.create(sip) db.session.commit() accessioned_id = factories.create_accession_id(ark) assert accessioned_id == 'CERN-' + str(sip.id)
def test_record_sip_model(db): """Test the RecordSIP model.""" sip1 = SIP.create() db.session.commit() pid1 = PersistentIdentifier.create('recid', '12345') rsip1 = RecordSIP(sip_id=sip1.id, pid_id=pid1.id) db.session.add(rsip1) db.session.commit() assert RecordSIP.query.count() == 1
def test_create_accessioned_id(db): """Test ``create_accessioned_id`` function.""" # First, we create a SIP sip = SIP.create() ark = Archive.create(sip) db.session.commit() accessioned_id = factories.create_accession_id(ark) assert accessioned_id \ == current_app.config['ARCHIVEMATICA_ORGANIZATION_NAME'] + '-' \ + str(sip.id)
def test_sip_file_model(db): """Test the SIPFile model.""" sip1 = SIP.create('json', '{}') file1 = FileInstance.create() sipfile1 = SIPFile(sip_id=sip1.id, filepath="foobar.zip", file_id=file1.id) db.session.add(sipfile1) db.session.commit() assert SIP.query.count() == 1 assert SIPFile.query.count() == 1
def test_ArchiveDownload_get_412(db, client, oauth2): """Test the Download's get method with no archivematica_id.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id') db.session.commit() response = client.get( url_for('invenio_archivematica_api.download_api', accession_id=ark.accession_id, access_token=oauth2.token)) assert response.status_code == 412
def test_oais_fail_transfer(db): """Test the oais_fail_transfer function.""" # let's create a SIP sip = SIP.create() Archive.create(sip) db.session.commit() # we fail the transfer oais_fail_transfer(sip.id) assert Archive.query.count() == 1 ark = Archive.get_from_sip(sip.id) assert ark.status == ArchiveStatus.FAILED
def test_oais_process_transfer(db): """Test the oais_process_transfer function.""" # let's create a SIP sip = SIP.create() Archive.create(sip) db.session.commit() aipid = uuid.uuid4() oais_process_transfer(sip.id, archivematica_id=aipid) assert Archive.query.count() == 1 ark = Archive.get_from_sip(sip.id) assert ark.status == ArchiveStatus.PROCESSING_TRANSFER assert ark.archivematica_id == aipid
def test_ArchiveDownload_get_520(db, client, oauth2): """Test the Download's get method with no storage server running.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id', archivematica_id=uuid.uuid4()) ark.status = ArchiveStatus.REGISTERED db.session.commit() response = client.get( url_for('invenio_archivematica_api.download_api', accession_id=ark.accession_id, access_token=oauth2.token)) assert response.status_code == 520
def test_oais_finish_transfer(db): """Test the oais_finish_transfer function.""" # let's create a SIP sip = SIP.create() Archive.create(sip) aipid = uuid.uuid4() db.session.commit() # we finish the transfer oais_finish_transfer(sip.id, archivematica_id=aipid) assert Archive.query.count() == 1 ark = Archive.get_from_sip(sip.id) assert ark.status == ArchiveStatus.REGISTERED assert ark.archivematica_id == aipid assert ark.sip.archived is True
def test_archive_new_sips(db, location): """Test the archive_new_sips function.""" # we create 2 SIP sip1 = SIP.create() Archive.create(sip1) db.session.commit() time.sleep(3) sip2 = SIP.create() Archive.create(sip2) db.session.commit() # we archive all records older than 2 seconds archive_new_sips('invenio_archivematica.factories.create_accession_id', days=0, seconds=2, delay=False) arks = Archive.query.all() assert len(arks) == 2 for ark in arks: if ark.sip_id == sip1.id: assert ark.status == ArchiveStatus.WAITING # we update the archive so it will be ignored in what follows ark.status = ArchiveStatus.IGNORED db.session.commit() else: assert ark.status == ArchiveStatus.NEW # now we archive everything, but rec2 shouldn't be archived as it is # flagged as IGNORED archive_new_sips('invenio_archivematica.factories.create_accession_id', days=0, delay=False) arks = Archive.query.all() assert len(arks) == 2 for ark in arks: if ark.sip_id == sip1.id: assert ark.status == ArchiveStatus.IGNORED else: assert ark.status == ArchiveStatus.WAITING
def create(cls, archivable, files=None, metadata=None, user_id=None, agent=None): """Create a SIP, from the PID and the Record. Apart from the SIP itself, it also creates ``SIPFile`` objects for each of the files in the record, along with ``SIPMetadata`` for the metadata. Those objects are not returned by this function but can be fetched by the corresponding SIP attributes 'files' and 'metadata'. The created model is stored in the attribute 'model'. :param bool archivable: tells if the SIP should be archived or not. Usefull if ``Invenio-Archivematica`` is installed. :param files: The list of files to associate with the SIP. See :py:func:`invenio_sipstore.api.SIP.attach_file` :param dict metadata: A dictionary of metadata. The keys are the type (valid :py:class:`invenio_sipstore.models.SIPMetadataType` name) and the values are the content (string) :param user_id: the ID of the user. If not given, automatically computed :param agent: If not given, automatically computed :returns: API SIP object. :rtype: :py:class:`invenio_sipstore.api.SIP` """ if not user_id: user_id = (None if current_user.is_anonymous else current_user.get_id()) if not agent: agent_factory = import_string( current_app.config['SIPSTORE_AGENT_FACTORY']) agent = agent_factory() files = [] if not files else files metadata = {} if not metadata else metadata with db.session.begin_nested(): sip = cls( SIP_.create(user_id=user_id, agent=agent, archivable=archivable)) for f in files: sip.attach_file(f) for type, content in metadata.items(): sip.attach_metadata(type, content) sipstore_created.send(sip) return sip
def test_ArchiveDownload_get_status_code(db, client, oauth2): """Test the API request for Download's get method.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id', archivematica_id=uuid.uuid4()) ark.status = ArchiveStatus.REGISTERED db.session.commit() mock_response = Response() mock_response.status_code = 404 with patch('requests.get', return_value=mock_response): response = client.get( url_for('invenio_archivematica_api.download_api', accession_id=ark.accession_id, access_token=oauth2.token)) assert response.status_code == mock_response.status_code
def test_Archive_get_status_code(db, client, oauth2): """Test the Archive's get method with error on Archivematica.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id', archivematica_id=uuid.uuid4()) ark.status = ArchiveStatus.WAITING db.session.commit() mock_response = Response() mock_response.status_code = 404 with patch('requests.get', return_value=mock_response): response = client.get(url_for('invenio_archivematica_api.archive_api', accession_id=ark.accession_id, access_token=oauth2.token), data=json.dumps({'realStatus': True}), content_type='application/json') assert response.status_code == mock_response.status_code
def test_sip_metadata_model(db): """Test the SIPMetadata model.""" sip1 = SIP.create() mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') db.session.add(mtype) metadata1 = '{"title": "great book"}' sipmetadata = SIPMetadata(sip_id=sip1.id, content=metadata1, type=mtype) db.session.add(sipmetadata) db.session.commit() assert SIP.query.count() == 1 assert SIPMetadataType.query.count() == 1 assert SIPMetadata.query.count() == 1 sipmetadata = SIPMetadata.query.one() assert sipmetadata.content == metadata1 assert sipmetadata.type.format == 'json' assert sipmetadata.sip.id == sip1.id
def test_transfer_rsync(app, db, location): """Test factories.transfer_rsync function.""" # config app.config['SIPSTORE_ARCHIVER_DIRECTORY_BUILDER'] = \ 'helpers:archive_directory_builder' app.config['SIPSTORE_ARCHIVER_METADATA_TYPES'] = ['test'] # SIP sip = SIP.create() # SIPMetadataType mtype = SIPMetadataType(title='Test', name='test', format='json') db.session.add(mtype) # SIPMetadata mcontent = {'title': 'title', 'author': 'me'} meth = SIPMetadata(sip=sip, type=mtype, content=json.dumps(mcontent)) db.session.add(meth) # SIPFile f = FileInstance.create() fcontent = b'weighted companion cube\n' f.set_contents(BytesIO(fcontent), default_location=location.uri) sfile = SIPFile(sip=sip, file=f, filepath='portal.txt') db.session.add(sfile) db.session.commit() # EXPORT folder = path.join(location.uri, 'lulz') params = { 'server': '', 'user': '', 'destination': folder, 'args': '-az' } factories.transfer_rsync(sip.id, params) # TEST assert not path.exists(path.join(location.uri, 'test')) assert path.isdir(folder) assert path.isdir(path.join(folder, 'files')) assert path.isfile(path.join(folder, 'files', 'portal.txt')) assert path.isdir(path.join(folder, 'metadata')) assert path.isfile(path.join(folder, 'metadata', 'test.json')) with open(path.join(folder, 'files', 'portal.txt'), 'rb') as fp: assert fp.read() == fcontent with open(path.join(folder, 'metadata', 'test.json'), 'r') as fp: assert json.loads(fp.read()) == mcontent
def test_Archive_get_200(db, client, oauth2): """Test the Archive's get method with no archivematica_id.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id', archivematica_id=uuid.uuid4()) db.session.commit() response = client.get( url_for('invenio_archivematica_api.archive_api', accession_id=ark.accession_id, access_token=oauth2.token)) assert response.status_code == 200 result = json.loads(response.data.decode('utf-8')) assert 'sip_id' in result and result['sip_id'] == str(sip.id) assert 'status' in result and result['status'] == 'NEW' assert 'accession_id' in result and result['accession_id'] == 'id' assert 'archivematica_id' in result \ and result['archivematica_id'] == str(ark.archivematica_id)
def create(cls, archivable, files=None, metadata=None, user_id=None, agent=None): """Create a SIP, from the PID and the Record. Apart from the SIP itself, it also creates ``SIPFile`` objects for each of the files in the record, along with ``SIPMetadata`` for the metadata. Those objects are not returned by this function but can be fetched by the corresponding SIP attributes 'files' and 'metadata'. The created model is stored in the attribute 'model'. :param bool archivable: tells if the SIP should be archived or not. Usefull if ``Invenio-Archivematica`` is installed. :param files: The list of files to associate with the SIP. See :py:func:`invenio_sipstore.api.SIP.attach_file` :param dict metadata: A dictionary of metadata. The keys are the type (valid :py:class:`invenio_sipstore.models.SIPMetadataType` name) and the values are the content (string) :param user_id: the ID of the user. If not given, automatically computed :param agent: If not given, automatically computed :returns: API SIP object. :rtype: :py:class:`invenio_sipstore.api.SIP` """ if not user_id: user_id = (None if not current_user or current_user.is_anonymous else current_user.get_id()) if not agent: agent_factory = import_string( current_app.config['SIPSTORE_AGENT_FACTORY']) agent = agent_factory() files = [] if not files else files metadata = {} if not metadata else metadata with db.session.begin_nested(): sip = cls(SIP_.create(user_id=user_id, agent=agent, archivable=archivable)) for f in files: sip.attach_file(f) for type, content in metadata.items(): sip.attach_metadata(type, content) sipstore_created.send(sip) return sip
def test_SIP_metadata(db): """Test the metadata methods of API SIP.""" # we create a SIP model sip = SIP_.create() mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') db.session.add(mtype) db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.metadata) == 0 # we create a dummy metadata metadata = json.dumps({'this': 'is', 'not': 'sparta'}) # we attach it to the SIP sm = api_sip.attach_metadata('json-test', metadata) db.session.commit() assert len(api_sip.metadata) == 1 assert api_sip.metadata[0].type.format == 'json' assert api_sip.metadata[0].content == metadata assert sip.sip_metadata[0].content == metadata
def test_SIP(db): """Test SIP API class.""" user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} # we create a SIP model sip = SIP_.create(user_id=user.id, agent=agent) db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert api_sip.model is sip assert api_sip.id == sip.id assert api_sip.user is user assert api_sip.agent == agent assert api_sip.archivable is True assert api_sip.archived is False api_sip.archived = True db.session.commit() assert api_sip.archived is True assert sip.archived is True # test of the get method api_sip2 = SIP.get_sip(sip.id) assert api_sip2.id == api_sip.id
def test_Archive_patch_200(db, client, oauth2): """Test the Archive's get method with no archivematica_id.""" sip = SIP.create() ark = Archive.create(sip=sip, accession_id='id') db.session.commit() params = {'archivematica_id': str(uuid.uuid4()), 'status': 'COMPLETE'} response = client.patch(url_for('invenio_archivematica_api.archive_api', accession_id=ark.accession_id, access_token=oauth2.token), data=json.dumps(params), content_type='application/json') assert response.status_code == 200 result = json.loads(response.data.decode('utf-8')) assert 'sip_id' in result and result['sip_id'] == str(sip.id) assert 'status' in result and result['status'] == 'REGISTERED' assert 'accession_id' in result and result['accession_id'] == 'id' assert 'archivematica_id' in result \ and result['archivematica_id'] == params['archivematica_id'] ark = Archive.query.one() assert ark.status == ArchiveStatus.REGISTERED assert str(ark.archivematica_id) == params['archivematica_id']
def test_oais_start_transfer(app, db, location): """Test the oais_start_transfer function.""" assert Archive.query.count() == 0 # let's create a SIP sip = SIP.create() Archive.create(sip) db.session.commit() assert Archive.query.count() == 1 # we start the transfer oais_start_transfer(sip.id, '1991') ark = Archive.get_from_sip(sip.id) assert ark.status == ArchiveStatus.WAITING assert ark.accession_id == '1991' # we try the case where no archive exist and transfer fails db.session.delete(ark) db.session.commit() app.config['ARCHIVEMATICA_TRANSFER_FACTORY'] = 'helpers:transfer_fail' assert Archive.query.count() == 0 oais_start_transfer(sip.id, '1991') ark = Archive.get_from_sip(sip.id) assert Archive.query.count() == 1 assert ark.status == ArchiveStatus.FAILED assert ark.accession_id == '1991' assert ark.sip.archived is False
def sips(db, locations, sip_metadata_types): """Fixture for the SIP objects sharing multiple files. Four SIPs are sharing three files in the following way: SIP-1: File1 SIP-2: File1, File2 SIP-3: File2(renamed on SIPFile, but same FileInstance), File3 SIP-4: File4, File5, File6 """ # A SIP with agent info sip1 = SIP.create(agent={ 'email': '*****@*****.**', 'orcid': '1111-1111-1111-1111', 'ip_address': '1.1.1.1' }) sip1api = SIPApi(sip1) sip1api.attach_metadata('marcxml-test', '<p>XML 1</p>') sip1api.attach_metadata('json-test', '{"title": "JSON 1"}') # Metadata 'txt-test', although attached should not be archived # (see conftest configuration) sip1api.attach_metadata('txt-test', 'Title: TXT 1') file1 = FileInstance.create() file1.set_contents(BytesIO(b('test')), default_location=locations['default'].uri) sip1file1 = SIPFile(sip_id=sip1.id, filepath="foobar.txt", file_id=file1.id) db_.session.add(sip1file1) sip2 = SIP.create() sip2api = SIPApi(sip2) sip2api.attach_metadata('marcxml-test', '<p>XML 2</p>') sip2api.attach_metadata('json-test', '{"title": "JSON 2"}') file2 = FileInstance.create() file2.set_contents(BytesIO(b'test-second'), default_location=locations['default'].uri) sip2file1 = SIPFile(sip_id=sip2.id, filepath="foobar.txt", file_id=file1.id) sip2file2 = SIPFile(sip_id=sip2.id, filepath="foobar2.txt", file_id=file2.id) db_.session.add(sip2file1) db_.session.add(sip2file2) sip3 = SIP.create() sip3api = SIPApi(sip3) sip3api.attach_metadata('marcxml-test', '<p>XML 3</p>') sip3api.attach_metadata('json-test', '{"title": "JSON 3"}') file3 = FileInstance.create() file3.set_contents(BytesIO(b'test-third'), default_location=locations['default'].uri) sip3file2 = SIPFile(sip_id=sip3.id, filepath="foobar2-renamed.txt", file_id=file2.id) sip3file3 = SIPFile(sip_id=sip3.id, filepath="foobar3.txt", file_id=file3.id) db_.session.add(sip3file2) db_.session.add(sip3file3) # A SIP with naughty filenames sip4 = SIP.create() sip4api = SIPApi(sip4) sip4api.attach_metadata('marcxml-test', '<p>XML 4 żółć</p>') sip4api.attach_metadata('json-test', '{"title": "JSON 4 żółć"}') file4 = FileInstance.create() file4.set_contents(BytesIO('test-fourth żółć'.encode('utf-8')), default_location=locations['default'].uri) file5 = FileInstance.create() file5.set_contents(BytesIO('test-fifth ąęćźə'.encode('utf-8')), default_location=locations['default'].uri) file6 = FileInstance.create() file6.set_contents(BytesIO('test-sixth π'.encode('utf-8')), default_location=locations['default'].uri) sip5file4 = SIPFile(sip_id=sip4.id, filepath="../../foobar.txt", file_id=file4.id) sip5file5 = SIPFile(sip_id=sip4.id, filepath="http://maliciouswebsite.com/hack.js", file_id=file5.id) sip5file6 = SIPFile(sip_id=sip4.id, filepath="łóżźćąę.dat", file_id=file6.id) db_.session.add(sip5file4) db_.session.add(sip5file5) db_.session.add(sip5file6) # A SIP with metadata-only changes sip5 = SIP.create() sip5api = SIPApi(sip5) sip5api.attach_metadata('marcxml-test', '<p>XML 5 Meta Only</p>') db_.session.commit() return [sip1api, sip2api, sip3api, sip4api, sip5api]
def test_default_archive_directory_builder(app, db): """Test the default archive builder.""" sip_id = UUID('abcd0000-1111-2222-3333-444455556666') sip = SIP.create(id_=sip_id) assert default_archive_directory_builder(sip) == \ ['ab', 'cd', '0000-1111-2222-3333-444455556666']
def test_is_archivable_none(db): """Test ``is_archivable_none`` function.""" sip1 = SIP.create(archivable=True) sip2 = SIP.create(archivable=False) assert not factories.is_archivable_none(sip1) assert not factories.is_archivable_none(sip2)
def create_files_and_sip(deposit, dep_pid): """Create deposit Bucket, Files and SIPs.""" from invenio_pidstore.errors import PIDDoesNotExistError from invenio_pidstore.models import PersistentIdentifier from invenio_sipstore.errors import SIPUserDoesNotExist from invenio_sipstore.models import SIP, RecordSIP, SIPFile from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_db import db buc = Bucket.create() recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id) db.session.add(recbuc) deposit.setdefault('_deposit', dict()) deposit.setdefault('_files', list()) files = deposit.get('files', []) sips = deposit.get('sips', []) recid = None if sips: recids = [int(sip['metadata']['recid']) for sip in sips] if len(set(recids)) > 1: logger.error('Multiple recids ({recids}) found in deposit {depid}' ' does not exists.'.format(recids=recids, depid=dep_pid.pid_value)) raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids))) elif recids: # If only one recid recid = recids[0] # Store the path -> FileInstance mappings for SIPFile creation later dep_file_instances = list() for file_ in files: fi = FileInstance.create() fi.set_uri(file_['path'], file_['size'], file_['checksum']) ov = ObjectVersion.create(buc, file_['name'], _file_id=fi.id) file_meta = dict( bucket=str(buc.id), key=file_['name'], checksum=file_['checksum'], size=file_['size'], version_id=str(ov.version_id), ) deposit['_files'].append(file_meta) dep_file_instances.append((file_['path'], fi)) for idx, sip in enumerate(sips): agent = None user_id = None if sip['agents']: agent = dict( ip_address=sip['agents'][0].get('ip_address', ""), email=sip['agents'][0].get('email_address', ""), ) user_id = sip['agents'][0]['user_id'] content = sip['package'] sip_format = 'marcxml' try: sip = SIP.create(sip_format, content, user_id=user_id, agent=agent) except SIPUserDoesNotExist: logger.exception('User ID {user_id} referred in deposit {depid} ' 'does not exists.'.format( user_id=user_id, depid=dep_pid.pid_value)) raise DepositSIPUserDoesNotExist(dep_pid.pid_value, user_id) # If recid was found, attach it to SIP # TODO: This is always uses the first recid, as we quit if multiple # recids are found in the sips information if recid: try: pid = PersistentIdentifier.get(pid_type='recid', pid_value=recid) record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id) db.session.add(record_sip) except PIDDoesNotExistError: logger.exception('Record {recid} referred in ' 'Deposit {depid} does not exists.'.format( recid=recid, depid=dep_pid.pid_value)) raise DepositRecidDoesNotExist(dep_pid.pid_value, recid) if idx == 0: for fp, fi in dep_file_instances: sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id) db.session.add(sipf) deposit.commit() db.session.commit() return deposit
def create_files_and_sip(deposit, dep_pid): """Create deposit Bucket, Files and SIPs.""" from invenio_pidstore.errors import PIDDoesNotExistError from invenio_pidstore.models import PersistentIdentifier, PIDStatus from invenio_sipstore.errors import SIPUserDoesNotExist from invenio_sipstore.models import SIP, RecordSIP, SIPFile from invenio_files_rest.models import Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_db import db buc = Bucket.create() recbuc = RecordsBuckets(record_id=deposit.id, bucket_id=buc.id) db.session.add(recbuc) deposit.setdefault('_deposit', dict()) deposit.setdefault('_buckets', dict(deposit=str(buc.id))) deposit.setdefault('_files', list()) files = deposit.get('files', []) sips = deposit.get('sips', []) # Look for prereserved DOI (and recid) if 'drafts' in deposit: drafts = list(deposit['drafts'].items()) if len(drafts) != 1: logger.exception('Deposit {dep_pid} has multiple drafts'.format( dep_pid=dep_pid)) if len(drafts) == 1: draft_type, draft = drafts[0] draft_v = draft['values'] if 'prereserve_doi' in draft_v: pre_recid = str(draft_v['prereserve_doi']['recid']) pre_doi = str(draft_v['prereserve_doi']['doi']) # If pre-reserve info available, try to reserve 'recid' try: pid = PersistentIdentifier.get(pid_type='recid', pid_value=str(pre_recid)) except PIDDoesNotExistError: # Reserve recid pid = PersistentIdentifier.create( pid_type='recid', pid_value=str(pre_recid), object_type='rec', status=PIDStatus.RESERVED) # If pre-reserve info available, try to reserve 'doi' try: pid = PersistentIdentifier.get(pid_type='doi', pid_value=str(pre_doi)) except PIDDoesNotExistError: # Reserve DOI pid = PersistentIdentifier.create( pid_type='doi', pid_value=str(pre_doi), object_type='rec', status=PIDStatus.RESERVED) if RecordIdentifier.query.get(int(pre_recid)) is None: RecordIdentifier.insert(int(pre_recid)) # Store the path -> FileInstance mappings for SIPFile creation later dep_file_instances = list() for file_ in files: size = file_['size'] key = file_['name'] # Warning: Assumes all checksums are MD5! checksum = 'md5:{0}'.format(file_['checksum']) fi = FileInstance.create() fi.set_uri(file_['path'], size, checksum) ov = ObjectVersion.create(buc, key, _file_id=fi.id) ext = splitext(ov.key)[1].lower() if ext.startswith('.'): ext = ext[1:] file_meta = dict( bucket=str(ov.bucket.id), key=ov.key, checksum=ov.file.checksum, size=ov.file.size, version_id=str(ov.version_id), type=ext, ) deposit['_files'].append(file_meta) dep_file_instances.append((file_['path'], fi)) # Get a recid from SIP information recid = None if sips: recids = [int(sip['metadata']['recid']) for sip in sips] if len(set(recids)) > 1: logger.error('Multiple recids ({recids}) found in deposit {depid}' ' does not exists.'.format(recids=recids, depid=dep_pid.pid_value)) raise DepositMultipleRecids(dep_pid.pid_value, list(set(recids))) elif recids: # If only one recid recid = recids[0] for idx, sip in enumerate(sips): agent = None user_id = None if sip['agents']: agent = dict( ip_address=empty_str_if_none(sip['agents'][0].get( 'ip_address', "")), email=empty_str_if_none(sip['agents'][0].get( 'email_address', "")), ) user_id = sip['agents'][0]['user_id'] if user_id == 0: user_id = None content = sip['package'] sip_format = 'marcxml' try: sip = SIP.create(sip_format, content, user_id=user_id, agent=agent) except SIPUserDoesNotExist: logger.exception('User ID {user_id} referred in deposit {depid} ' 'does not exists.'.format( user_id=user_id, depid=dep_pid.pid_value)) sip = SIP.create(sip_format, content, agent=agent) # Attach recid to SIP if recid: try: pid = PersistentIdentifier.get(pid_type='recid', pid_value=str(recid)) record_sip = RecordSIP(sip_id=sip.id, pid_id=pid.id) db.session.add(record_sip) except PIDDoesNotExistError: logger.exception('Record {recid} referred in ' 'Deposit {depid} does not exists.'.format( recid=recid, depid=dep_pid.pid_value)) if deposit['_p']['submitted'] == True: logger.exception('Pair {recid}/{depid} was submitted,' ' (should it be unpublished?).'.format( recid=recid, depid=dep_pid.pid_value)) else: logger.exception( 'Pair {recid}/{depid} was not submitted.'.format( recid=recid, depid=dep_pid.pid_value)) # Reserve recid pid = PersistentIdentifier.create(pid_type='recid', pid_value=str(recid), object_type='rec', status=PIDStatus.RESERVED) if RecordIdentifier.query.get(int(recid)) is None: RecordIdentifier.insert(int(recid)) if idx == 0: for fp, fi in dep_file_instances: sipf = SIPFile(sip_id=sip.id, filepath=fp, file_id=fi.id) db.session.add(sipf) deposit.commit() return deposit
def sips(db, locations, sip_metadata_types): """Fixture for the SIP objects sharing multiple files. Four SIPs are sharing three files in the following way: SIP-1: File1 SIP-2: File1, File2 SIP-3: File2(renamed on SIPFile, but same FileInstance), File3 SIP-4: File4, File5, File6 """ # A SIP with agent info sip1 = SIP.create( agent={ 'email': '*****@*****.**', 'orcid': '1111-1111-1111-1111', 'ip_address': '1.1.1.1' }) sip1api = SIPApi(sip1) sip1api.attach_metadata('marcxml-test', '<p>XML 1</p>') sip1api.attach_metadata('json-test', '{"title": "JSON 1"}') # Metadata 'txt-test', although attached should not be archived # (see conftest configuration) sip1api.attach_metadata('txt-test', 'Title: TXT 1') file1 = FileInstance.create() file1.set_contents(BytesIO(b('test')), default_location=locations['default'].uri) sip1file1 = SIPFile(sip_id=sip1.id, filepath="foobar.txt", file_id=file1.id) db_.session.add(sip1file1) sip2 = SIP.create() sip2api = SIPApi(sip2) sip2api.attach_metadata('marcxml-test', '<p>XML 2</p>') sip2api.attach_metadata('json-test', '{"title": "JSON 2"}') file2 = FileInstance.create() file2.set_contents(BytesIO(b'test-second'), default_location=locations['default'].uri) sip2file1 = SIPFile(sip_id=sip2.id, filepath="foobar.txt", file_id=file1.id) sip2file2 = SIPFile(sip_id=sip2.id, filepath="foobar2.txt", file_id=file2.id) db_.session.add(sip2file1) db_.session.add(sip2file2) sip3 = SIP.create() sip3api = SIPApi(sip3) sip3api.attach_metadata('marcxml-test', '<p>XML 3</p>') sip3api.attach_metadata('json-test', '{"title": "JSON 3"}') file3 = FileInstance.create() file3.set_contents(BytesIO(b'test-third'), default_location=locations['default'].uri) sip3file2 = SIPFile(sip_id=sip3.id, filepath="foobar2-renamed.txt", file_id=file2.id) sip3file3 = SIPFile(sip_id=sip3.id, filepath="foobar3.txt", file_id=file3.id) db_.session.add(sip3file2) db_.session.add(sip3file3) # A SIP with naughty filenames sip4 = SIP.create() sip4api = SIPApi(sip4) sip4api.attach_metadata('marcxml-test', '<p>XML 4 żółć</p>') sip4api.attach_metadata('json-test', '{"title": "JSON 4 żółć"}') file4 = FileInstance.create() file4.set_contents(BytesIO('test-fourth żółć'.encode('utf-8')), default_location=locations['default'].uri) file5 = FileInstance.create() file5.set_contents(BytesIO('test-fifth ąęćźə'.encode('utf-8')), default_location=locations['default'].uri) file6 = FileInstance.create() file6.set_contents(BytesIO('test-sixth π'.encode('utf-8')), default_location=locations['default'].uri) sip5file4 = SIPFile(sip_id=sip4.id, filepath="../../foobar.txt", file_id=file4.id) sip5file5 = SIPFile(sip_id=sip4.id, filepath="http://maliciouswebsite.com/hack.js", file_id=file5.id) sip5file6 = SIPFile(sip_id=sip4.id, filepath="łóżźćąę.dat", file_id=file6.id) db_.session.add(sip5file4) db_.session.add(sip5file5) db_.session.add(sip5file6) # A SIP with metadata-only changes sip5 = SIP.create() sip5api = SIPApi(sip5) sip5api.attach_metadata('marcxml-test', '<p>XML 5 Meta Only</p>') db_.session.commit() return [sip1api, sip2api, sip3api, sip4api, sip5api]