def sip_metadata_types(db): """Add a SIP metadata type (internal use only) for BagIt.""" bagit_type = SIPMetadataType( title='BagIt Archiver Metadata', name=BagItArchiver.bagit_metadata_type_name, format='json', ) json_type = SIPMetadataType(title='Record JSON Metadata', name='json-test', format='json') xml_type = SIPMetadataType(title='Record MARCXML Metadata', name='marcxml-test', format='xml') txt_type = SIPMetadataType(title='Raw Text Metadata', name='txt-test', format='txt') db.session.add(bagit_type) db.session.add(json_type) db.session.add(xml_type) db.session.add(txt_type) db.session.commit() types = {t.name: t for t in [bagit_type, json_type, xml_type, txt_type]} return types
def sip_metadata_types(db): """Add a SIP metadata type (internal use only) for BagIt.""" bagit_type = SIPMetadataType( title='BagIt Archiver Metadata', name=BagItArchiver.bagit_metadata_type_name, format='json', ) json_type = SIPMetadataType(title='Record JSON Metadata', name='json-test', format='json') xml_type = SIPMetadataType(title='Record MARCXML Metadata', name='marcxml-test', format='xml') # The type 'txt-test' is intentionally ommited from the configuration # (SIPSTORE_ARCHIVER_METADATA_TYPES). It should not be archived in any of # the tests. txt_type = SIPMetadataType(title='Raw Text Metadata', name='txt-test', format='txt') db.session.add(bagit_type) db.session.add(json_type) db.session.add(xml_type) db.session.add(txt_type) db.session.commit() types = dict((t.name, t) for t in [bagit_type, json_type, xml_type]) return types
def test_SIP_create(app, db, mocker): """Test the create method from SIP API.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) db.session.commit() # we create a file content = b'test lol\n' bucket = Bucket.create() obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content)) db.session.commit() files = [obj] # setup metadata mjson = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') marcxml = SIPMetadataType(title='MARC XML Test', name='marcxml-test', format='xml', schema='uri') db.session.add(mjson) db.session.add(marcxml) metadata = { 'json-test': json.dumps({ 'this': 'is', 'not': 'sparta' }), 'marcxml-test': '<record></record>' } # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} sip = SIP.create(True, files=files, metadata=metadata, user_id=user.id, agent=agent) db.session.commit() assert SIP_.query.count() == 1 assert len(sip.files) == 1 assert len(sip.metadata) == 2 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert sip.user.id == user.id assert sip.agent == agent # we mock the user and the agent to test if the creation works app.config['SIPSTORE_AGENT_JSONSCHEMA_ENABLED'] = False mock_current_user = mocker.patch('invenio_sipstore.api.current_user') type(mock_current_user).is_anonymous = mocker.PropertyMock( return_value=True) sip = SIP.create(True, files=files, metadata=metadata) assert sip.model.user_id is None assert sip.user is None assert sip.agent == {} # finalization rmtree(tmppath)
def test_sip_metadatatype_model(db): """Test the SIPMetadata model.""" mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') db.session.add(mtype) db.session.commit() assert SIPMetadataType.query.count() == 1 sipmetadatatype = SIPMetadataType.get(mtype.id) assert sipmetadatatype.title == 'JSON Test' assert sipmetadatatype.format == 'json' assert sipmetadatatype.name == 'json-test' assert sipmetadatatype.schema == 'url'
def create(cls, pid, record, archivable, create_sip_files=True, user_id=None, agent=None, sip_metadata_type=None): """Create a SIP, from the PID and the Record. Apart from the SIP itself, it also creates ``RecordSIP`` for the SIP-PID-Record relationship, as well as ``SIPFile`` objects for each of the files in the record, along with ``SIPMetadata`` for the metadata. Those objects are not returned by this function but can be fetched by the corresponding RecordSIP attributes ``sip``, ``sip.files`` and ``sip.metadata``. :param pid: PID of the published record ('recid'). :type pid: :py:class:`invenio_pidstore.models.PersistentIdentifier` :param record: Record for which the SIP should be created. :type record: :py:class:`invenio_records.api.Record` :param bool archivable: tells if the record should be archived. Usefull when ``Invenio-Archivematica`` is installed. :param bool create_sip_files: If True the SIPFiles will be created. :param str sip_metadata_type: Used to fetch the :py:class:`invenio_sipstore.models.SIPMetadataType` by ``name``. If not provided, the ``$schema`` attribute of ``record`` will be used to determine the :py:class:`invenio_sipstore.models.SIPMetadataType`. (default: ``None``) :returns: RecordSIP object. :rtype: :py:class:`invenio_sipstore.api.RecordSIP` """ files = record.files if create_sip_files else None if sip_metadata_type: mtype = SIPMetadataType.get_from_name(sip_metadata_type) else: mtype = SIPMetadataType.get_from_schema(record['$schema']) metadata = {mtype.name: json.dumps(record.dumps())} with db.session.begin_nested(): sip = SIP.create(archivable, files=files, metadata=metadata, user_id=user_id, agent=agent) model = RecordSIP_(sip_id=sip.id, pid_id=pid.id) db.session.add(model) recsip = cls(model, sip) return recsip
def create(cls, pid, record, archivable, create_sip_files=True, user_id=None, agent=None): """Create a SIP, from the PID and the Record. Apart from the SIP itself, it also creates ``RecordSIP`` for the SIP-PID-Record relationship, as well as ``SIPFile`` objects for each of the files in the record, along with ``SIPMetadata`` for the metadata. Those objects are not returned by this function but can be fetched by the corresponding RecordSIP attributes ``sip``, ``sip.files`` and ``sip.metadata``. :param pid: PID of the published record ('recid'). :type pid: :py:class:`invenio_pidstore.models.PersistentIdentifier` :param record: Record for which the SIP should be created. :type record: :py:class:`invenio_records.api.Record` :param bool archivable: tells if the record should be archived. Usefull when ``Invenio-Archivematica`` is installed. :param bool create_sip_files: If True the SIPFiles will be created. :returns: RecordSIP object. :rtype: :py:class:`invenio_sipstore.api.RecordSIP` """ files = record.files if create_sip_files else None mtype = SIPMetadataType.get_from_schema(record['$schema']) metadata = {mtype.name: json.dumps(record.dumps())} with db.session.begin_nested(): sip = SIP.create(archivable, files=files, metadata=metadata, user_id=user_id, agent=agent) model = RecordSIP_(sip_id=sip.id, pid_id=pid.id) db.session.add(model) recsip = cls(model, sip) return recsip
def attach_metadata(self, type, metadata): """Add metadata to the SIP. :param str type: the type of metadata (a valid :py:class:`invenio_sipstore.models.SIPMetadataType` name) :param str metadata: the metadata to attach. :returns: the created SIPMetadata :rtype: :py:class:`invenio_sipstore.models.SIPMetadata` """ mtype = SIPMetadataType.get_from_name(type) sm = SIPMetadata(sip_id=self.id, type=mtype, content=metadata) db.session.add(sm) return sm
def archive_directory_builder(sip): """Generate a path for BagIt from SIP. :param sip: SIP which is to be archived :type SIP: invenio_sipstore.models.SIP :return: list of str """ jsonmeta = SIPMetadata.query.get( (sip.id, SIPMetadataType.get_from_name('json').id)) if jsonmeta is not None: data = json.loads(jsonmeta.content) revision = str(data['_deposit']['pid']['revision_id']) else: revision = "0" recid = sip.model.record_sips[0].pid.pid_value return generate_bag_path(recid, revision)
def test_sip_metadata_model(db): """Test the SIPMetadata model.""" sip1 = SIP.create() mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') db.session.add(mtype) metadata1 = '{"title": "great book"}' sipmetadata = SIPMetadata(sip_id=sip1.id, content=metadata1, type=mtype) db.session.add(sipmetadata) db.session.commit() assert SIP.query.count() == 1 assert SIPMetadataType.query.count() == 1 assert SIPMetadata.query.count() == 1 sipmetadata = SIPMetadata.query.one() assert sipmetadata.content == metadata1 assert sipmetadata.type.format == 'json' assert sipmetadata.sip.id == sip1.id
def test_transfer_rsync(app, db, location): """Test factories.transfer_rsync function.""" # config app.config['SIPSTORE_ARCHIVER_DIRECTORY_BUILDER'] = \ 'helpers:archive_directory_builder' app.config['SIPSTORE_ARCHIVER_METADATA_TYPES'] = ['test'] # SIP sip = SIP.create() # SIPMetadataType mtype = SIPMetadataType(title='Test', name='test', format='json') db.session.add(mtype) # SIPMetadata mcontent = {'title': 'title', 'author': 'me'} meth = SIPMetadata(sip=sip, type=mtype, content=json.dumps(mcontent)) db.session.add(meth) # SIPFile f = FileInstance.create() fcontent = b'weighted companion cube\n' f.set_contents(BytesIO(fcontent), default_location=location.uri) sfile = SIPFile(sip=sip, file=f, filepath='portal.txt') db.session.add(sfile) db.session.commit() # EXPORT folder = path.join(location.uri, 'lulz') params = { 'server': '', 'user': '', 'destination': folder, 'args': '-az' } factories.transfer_rsync(sip.id, params) # TEST assert not path.exists(path.join(location.uri, 'test')) assert path.isdir(folder) assert path.isdir(path.join(folder, 'files')) assert path.isfile(path.join(folder, 'files', 'portal.txt')) assert path.isdir(path.join(folder, 'metadata')) assert path.isfile(path.join(folder, 'metadata', 'test.json')) with open(path.join(folder, 'files', 'portal.txt'), 'rb') as fp: assert fp.read() == fcontent with open(path.join(folder, 'metadata', 'test.json'), 'r') as fp: assert json.loads(fp.read()) == mcontent
def test_SIP_metadata(db): """Test the metadata methods of API SIP.""" # we create a SIP model sip = SIP_.create() mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url') db.session.add(mtype) db.session.commit() # We create an API SIP on top of it api_sip = SIP(sip) assert len(api_sip.metadata) == 0 # we create a dummy metadata metadata = json.dumps({'this': 'is', 'not': 'sparta'}) # we attach it to the SIP sm = api_sip.attach_metadata('json-test', metadata) db.session.commit() assert len(api_sip.metadata) == 1 assert api_sip.metadata[0].type.format == 'json' assert api_sip.metadata[0].content == metadata assert sip.sip_metadata[0].content == metadata
def _get_bagit_metadata_type(cls): """Return the SIPMetadataType for the BagIt metadata files.""" return SIPMetadataType.get_from_name(cls.bagit_metadata_type_name)
except: pass locrecords = Location(name='records', uri='/eos/workspace/o/oais/archivematica-test/records/', default=True) locarchive = Location( name='archive', # this should go in SIPSTORE_ARCHIVER_LOCATION_NAME uri='/eos/workspace/o/oais/archivematica-test/transfer/') db.session.add(locrecords) db.session.add(locarchive) db.session.commit() # first we create a metadata type with a schema used by the following record mtype = SIPMetadataType( title='Invenio JSON test', name='invenio-json-test', format='json', schema='https://zenodo.org/schemas/deposits/records/record-v1.0.0.json') db.session.add(mtype) db.session.commit() # create record, it needs to use the same schema as the one in the metadata type recid = uuid.uuid4() pid = PersistentIdentifier.create('recid', '1501', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) record = Record.create( { '$schema':
def test_RecordSIP_create(db, mocker): """Test create method from the API class RecordSIP.""" # we setup a file storage tmppath = tempfile.mkdtemp() db.session.add(Location(name='default', uri=tmppath, default=True)) # setup metadata mtype = SIPMetadataType(title='JSON Test', name='json-test', format='json', schema='url://to/schema') db.session.add(mtype) db.session.commit() # first we create a record recid = uuid.uuid4() pid = PersistentIdentifier.create('recid', '1337', object_type='rec', object_uuid=recid, status=PIDStatus.REGISTERED) mocker.patch('invenio_records.api.RecordBase.validate', return_value=True, autospec=True) record = Record.create( { 'title': 'record test', '$schema': 'url://to/schema' }, recid) # we add a file to the record bucket = Bucket.create() content = b'Test file\n' RecordsBuckets.create(record=record.model, bucket=bucket) record.files['test.txt'] = BytesIO(content) db.session.commit() # Let's create a SIP user = create_test_user('*****@*****.**') agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'} rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent) db.session.commit() # test! assert RecordSIP_.query.count() == 1 assert SIP_.query.count() == 1 assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 1 assert len(rsip.sip.files) == 1 assert len(rsip.sip.metadata) == 1 metadata = rsip.sip.metadata[0] assert metadata.type.format == 'json' assert '"title": "record test"' in metadata.content assert rsip.sip.archivable is True # we try with no files rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent) assert SIPFile.query.count() == 1 assert SIPMetadata.query.count() == 2 assert len(rsip.sip.files) == 0 assert len(rsip.sip.metadata) == 1 # try with specific SIP metadata type mtype = SIPMetadataType(title='JSON Test 2', name='json-test-2', format='json', schema=None) # no schema db.session.add(mtype) db.session.commit() rsip = RecordSIP.create(pid, record, True, create_sip_files=False, user_id=user.id, agent=agent, sip_metadata_type='json-test-2') assert SIPMetadata.query.count() == 3 assert len(rsip.sip.metadata) == 1 assert rsip.sip.metadata[0].type.id == mtype.id # finalization rmtree(tmppath)
def loadsipmetadatatypes(types): """Load SIP metadata types.""" with db.session.begin_nested(): for type in types: db.session.add(SIPMetadataType(**type)) db.session.commit()