Exemplo n.º 1
0
def sip_metadata_types(db):
    """Add a SIP metadata type (internal use only) for BagIt."""
    bagit_type = SIPMetadataType(
        title='BagIt Archiver Metadata',
        name=BagItArchiver.bagit_metadata_type_name,
        format='json',
    )
    json_type = SIPMetadataType(title='Record JSON Metadata',
                                name='json-test',
                                format='json')
    xml_type = SIPMetadataType(title='Record MARCXML Metadata',
                               name='marcxml-test',
                               format='xml')
    txt_type = SIPMetadataType(title='Raw Text Metadata',
                               name='txt-test',
                               format='txt')

    db.session.add(bagit_type)
    db.session.add(json_type)
    db.session.add(xml_type)
    db.session.add(txt_type)
    db.session.commit()
    types = {t.name: t for t in [bagit_type, json_type, xml_type, txt_type]}

    return types
Exemplo n.º 2
0
def sip_metadata_types(db):
    """Add a SIP metadata type (internal use only) for BagIt."""
    bagit_type = SIPMetadataType(
        title='BagIt Archiver Metadata',
        name=BagItArchiver.bagit_metadata_type_name,
        format='json',
    )
    json_type = SIPMetadataType(title='Record JSON Metadata',
                                name='json-test',
                                format='json')
    xml_type = SIPMetadataType(title='Record MARCXML Metadata',
                               name='marcxml-test',
                               format='xml')
    # The type 'txt-test' is intentionally ommited from the configuration
    # (SIPSTORE_ARCHIVER_METADATA_TYPES). It should not be archived in any of
    # the tests.
    txt_type = SIPMetadataType(title='Raw Text Metadata',
                               name='txt-test',
                               format='txt')

    db.session.add(bagit_type)
    db.session.add(json_type)
    db.session.add(xml_type)
    db.session.add(txt_type)
    db.session.commit()
    types = dict((t.name, t) for t in [bagit_type, json_type, xml_type])

    return types
Exemplo n.º 3
0
def test_SIP_create(app, db, mocker):
    """Test the create method from SIP API."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we create a file
    content = b'test lol\n'
    bucket = Bucket.create()
    obj = ObjectVersion.create(bucket, 'test.txt', stream=BytesIO(content))
    db.session.commit()
    files = [obj]
    # setup metadata
    mjson = SIPMetadataType(title='JSON Test',
                            name='json-test',
                            format='json',
                            schema='url')
    marcxml = SIPMetadataType(title='MARC XML Test',
                              name='marcxml-test',
                              format='xml',
                              schema='uri')
    db.session.add(mjson)
    db.session.add(marcxml)
    metadata = {
        'json-test': json.dumps({
            'this': 'is',
            'not': 'sparta'
        }),
        'marcxml-test': '<record></record>'
    }
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    sip = SIP.create(True,
                     files=files,
                     metadata=metadata,
                     user_id=user.id,
                     agent=agent)
    db.session.commit()
    assert SIP_.query.count() == 1
    assert len(sip.files) == 1
    assert len(sip.metadata) == 2
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert sip.user.id == user.id
    assert sip.agent == agent
    # we mock the user and the agent to test if the creation works
    app.config['SIPSTORE_AGENT_JSONSCHEMA_ENABLED'] = False
    mock_current_user = mocker.patch('invenio_sipstore.api.current_user')
    type(mock_current_user).is_anonymous = mocker.PropertyMock(
        return_value=True)
    sip = SIP.create(True, files=files, metadata=metadata)
    assert sip.model.user_id is None
    assert sip.user is None
    assert sip.agent == {}
    # finalization
    rmtree(tmppath)
Exemplo n.º 4
0
def test_sip_metadatatype_model(db):
    """Test the SIPMetadata model."""
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url')
    db.session.add(mtype)
    db.session.commit()
    assert SIPMetadataType.query.count() == 1
    sipmetadatatype = SIPMetadataType.get(mtype.id)
    assert sipmetadatatype.title == 'JSON Test'
    assert sipmetadatatype.format == 'json'
    assert sipmetadatatype.name == 'json-test'
    assert sipmetadatatype.schema == 'url'
Exemplo n.º 5
0
    def create(cls,
               pid,
               record,
               archivable,
               create_sip_files=True,
               user_id=None,
               agent=None,
               sip_metadata_type=None):
        """Create a SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``RecordSIP`` for the
        SIP-PID-Record relationship, as well as ``SIPFile`` objects for each
        of the files in the record, along with ``SIPMetadata`` for the
        metadata.
        Those objects are not returned by this function but can be fetched by
        the corresponding RecordSIP attributes ``sip``, ``sip.files`` and
        ``sip.metadata``.

        :param pid: PID of the published record ('recid').
        :type pid: :py:class:`invenio_pidstore.models.PersistentIdentifier`
        :param record: Record for which the SIP should be created.
        :type record: :py:class:`invenio_records.api.Record`
        :param bool archivable: tells if the record should be archived.
            Usefull when ``Invenio-Archivematica`` is installed.
        :param bool create_sip_files: If True the SIPFiles will be created.
        :param str sip_metadata_type: Used to fetch the
            :py:class:`invenio_sipstore.models.SIPMetadataType` by ``name``.
            If not provided, the ``$schema`` attribute of ``record`` will be
            used to determine the
            :py:class:`invenio_sipstore.models.SIPMetadataType`.
            (default: ``None``)
        :returns: RecordSIP object.
        :rtype: :py:class:`invenio_sipstore.api.RecordSIP`
        """
        files = record.files if create_sip_files else None
        if sip_metadata_type:
            mtype = SIPMetadataType.get_from_name(sip_metadata_type)
        else:
            mtype = SIPMetadataType.get_from_schema(record['$schema'])
        metadata = {mtype.name: json.dumps(record.dumps())}
        with db.session.begin_nested():
            sip = SIP.create(archivable,
                             files=files,
                             metadata=metadata,
                             user_id=user_id,
                             agent=agent)
            model = RecordSIP_(sip_id=sip.id, pid_id=pid.id)
            db.session.add(model)
            recsip = cls(model, sip)
        return recsip
Exemplo n.º 6
0
    def create(cls, pid, record, archivable, create_sip_files=True,
               user_id=None, agent=None):
        """Create a SIP, from the PID and the Record.

        Apart from the SIP itself, it also creates ``RecordSIP`` for the
        SIP-PID-Record relationship, as well as ``SIPFile`` objects for each
        of the files in the record, along with ``SIPMetadata`` for the
        metadata.
        Those objects are not returned by this function but can be fetched by
        the corresponding RecordSIP attributes ``sip``, ``sip.files`` and
        ``sip.metadata``.

        :param pid: PID of the published record ('recid').
        :type pid: :py:class:`invenio_pidstore.models.PersistentIdentifier`
        :param record: Record for which the SIP should be created.
        :type record: :py:class:`invenio_records.api.Record`
        :param bool archivable: tells if the record should be archived.
            Usefull when ``Invenio-Archivematica`` is installed.
        :param bool create_sip_files: If True the SIPFiles will be created.
        :returns: RecordSIP object.
        :rtype: :py:class:`invenio_sipstore.api.RecordSIP`
        """
        files = record.files if create_sip_files else None
        mtype = SIPMetadataType.get_from_schema(record['$schema'])
        metadata = {mtype.name: json.dumps(record.dumps())}
        with db.session.begin_nested():
            sip = SIP.create(archivable, files=files, metadata=metadata,
                             user_id=user_id, agent=agent)
            model = RecordSIP_(sip_id=sip.id, pid_id=pid.id)
            db.session.add(model)
            recsip = cls(model, sip)
        return recsip
Exemplo n.º 7
0
def test_sip_metadatatype_model(db):
    """Test the SIPMetadata model."""
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url')
    db.session.add(mtype)
    db.session.commit()
    assert SIPMetadataType.query.count() == 1
    sipmetadatatype = SIPMetadataType.get(mtype.id)
    assert sipmetadatatype.title == 'JSON Test'
    assert sipmetadatatype.format == 'json'
    assert sipmetadatatype.name == 'json-test'
    assert sipmetadatatype.schema == 'url'
Exemplo n.º 8
0
    def attach_metadata(self, type, metadata):
        """Add metadata to the SIP.

        :param str type: the type of metadata (a valid
            :py:class:`invenio_sipstore.models.SIPMetadataType` name)
        :param str metadata: the metadata to attach.
        :returns: the created SIPMetadata
        :rtype: :py:class:`invenio_sipstore.models.SIPMetadata`
        """
        mtype = SIPMetadataType.get_from_name(type)
        sm = SIPMetadata(sip_id=self.id, type=mtype, content=metadata)
        db.session.add(sm)
        return sm
Exemplo n.º 9
0
    def attach_metadata(self, type, metadata):
        """Add metadata to the SIP.

        :param str type: the type of metadata (a valid
            :py:class:`invenio_sipstore.models.SIPMetadataType` name)
        :param str metadata: the metadata to attach.
        :returns: the created SIPMetadata
        :rtype: :py:class:`invenio_sipstore.models.SIPMetadata`
        """
        mtype = SIPMetadataType.get_from_name(type)
        sm = SIPMetadata(sip_id=self.id, type=mtype, content=metadata)
        db.session.add(sm)
        return sm
Exemplo n.º 10
0
def archive_directory_builder(sip):
    """Generate a path for BagIt from SIP.

    :param sip: SIP which is to be archived
    :type SIP: invenio_sipstore.models.SIP
    :return: list of str
    """
    jsonmeta = SIPMetadata.query.get(
        (sip.id, SIPMetadataType.get_from_name('json').id))
    if jsonmeta is not None:
        data = json.loads(jsonmeta.content)
        revision = str(data['_deposit']['pid']['revision_id'])
    else:
        revision = "0"
    recid = sip.model.record_sips[0].pid.pid_value
    return generate_bag_path(recid, revision)
Exemplo n.º 11
0
def test_sip_metadata_model(db):
    """Test the SIPMetadata model."""
    sip1 = SIP.create()
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url')
    db.session.add(mtype)
    metadata1 = '{"title": "great book"}'
    sipmetadata = SIPMetadata(sip_id=sip1.id, content=metadata1,
                              type=mtype)
    db.session.add(sipmetadata)
    db.session.commit()
    assert SIP.query.count() == 1
    assert SIPMetadataType.query.count() == 1
    assert SIPMetadata.query.count() == 1
    sipmetadata = SIPMetadata.query.one()
    assert sipmetadata.content == metadata1
    assert sipmetadata.type.format == 'json'
    assert sipmetadata.sip.id == sip1.id
Exemplo n.º 12
0
def test_transfer_rsync(app, db, location):
    """Test factories.transfer_rsync function."""
    # config
    app.config['SIPSTORE_ARCHIVER_DIRECTORY_BUILDER'] = \
        'helpers:archive_directory_builder'
    app.config['SIPSTORE_ARCHIVER_METADATA_TYPES'] = ['test']
    # SIP
    sip = SIP.create()
    # SIPMetadataType
    mtype = SIPMetadataType(title='Test', name='test', format='json')
    db.session.add(mtype)
    # SIPMetadata
    mcontent = {'title': 'title', 'author': 'me'}
    meth = SIPMetadata(sip=sip, type=mtype, content=json.dumps(mcontent))
    db.session.add(meth)
    # SIPFile
    f = FileInstance.create()
    fcontent = b'weighted companion cube\n'
    f.set_contents(BytesIO(fcontent), default_location=location.uri)
    sfile = SIPFile(sip=sip, file=f, filepath='portal.txt')
    db.session.add(sfile)
    db.session.commit()

    # EXPORT
    folder = path.join(location.uri, 'lulz')
    params = {
        'server': '',
        'user': '',
        'destination': folder,
        'args': '-az'
    }
    factories.transfer_rsync(sip.id, params)

    # TEST
    assert not path.exists(path.join(location.uri, 'test'))
    assert path.isdir(folder)
    assert path.isdir(path.join(folder, 'files'))
    assert path.isfile(path.join(folder, 'files', 'portal.txt'))
    assert path.isdir(path.join(folder, 'metadata'))
    assert path.isfile(path.join(folder, 'metadata', 'test.json'))
    with open(path.join(folder, 'files', 'portal.txt'), 'rb') as fp:
        assert fp.read() == fcontent
    with open(path.join(folder, 'metadata', 'test.json'), 'r') as fp:
        assert json.loads(fp.read()) == mcontent
Exemplo n.º 13
0
def test_SIP_metadata(db):
    """Test the metadata methods of API SIP."""
    # we create a SIP model
    sip = SIP_.create()
    mtype = SIPMetadataType(title='JSON Test',
                            name='json-test',
                            format='json',
                            schema='url')
    db.session.add(mtype)
    db.session.commit()
    # We create an API SIP on top of it
    api_sip = SIP(sip)
    assert len(api_sip.metadata) == 0
    # we create a dummy metadata
    metadata = json.dumps({'this': 'is', 'not': 'sparta'})
    # we attach it to the SIP
    sm = api_sip.attach_metadata('json-test', metadata)
    db.session.commit()
    assert len(api_sip.metadata) == 1
    assert api_sip.metadata[0].type.format == 'json'
    assert api_sip.metadata[0].content == metadata
    assert sip.sip_metadata[0].content == metadata
Exemplo n.º 14
0
 def _get_bagit_metadata_type(cls):
     """Return the SIPMetadataType for the BagIt metadata files."""
     return SIPMetadataType.get_from_name(cls.bagit_metadata_type_name)
Exemplo n.º 15
0
except:
    pass
locrecords = Location(name='records',
                      uri='/eos/workspace/o/oais/archivematica-test/records/',
                      default=True)
locarchive = Location(
    name='archive',  # this should go in SIPSTORE_ARCHIVER_LOCATION_NAME
    uri='/eos/workspace/o/oais/archivematica-test/transfer/')
db.session.add(locrecords)
db.session.add(locarchive)
db.session.commit()

# first we create a metadata type with a schema used by the following record
mtype = SIPMetadataType(
    title='Invenio JSON test',
    name='invenio-json-test',
    format='json',
    schema='https://zenodo.org/schemas/deposits/records/record-v1.0.0.json')
db.session.add(mtype)
db.session.commit()

# create record, it needs to use the same schema as the one in the metadata type
recid = uuid.uuid4()
pid = PersistentIdentifier.create('recid',
                                  '1501',
                                  object_type='rec',
                                  object_uuid=recid,
                                  status=PIDStatus.REGISTERED)
record = Record.create(
    {
        '$schema':
Exemplo n.º 16
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test',
                            name='json-test',
                            format='json',
                            schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create('recid',
                                      '1337',
                                      object_type='rec',
                                      object_uuid=recid,
                                      status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True,
                 autospec=True)
    record = Record.create(
        {
            'title': 'record test',
            '$schema': 'url://to/schema'
        }, recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid,
                            record,
                            True,
                            create_sip_files=False,
                            user_id=user.id,
                            agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1

    # try with specific SIP metadata type
    mtype = SIPMetadataType(title='JSON Test 2',
                            name='json-test-2',
                            format='json',
                            schema=None)  # no schema
    db.session.add(mtype)
    db.session.commit()

    rsip = RecordSIP.create(pid,
                            record,
                            True,
                            create_sip_files=False,
                            user_id=user.id,
                            agent=agent,
                            sip_metadata_type='json-test-2')
    assert SIPMetadata.query.count() == 3
    assert len(rsip.sip.metadata) == 1
    assert rsip.sip.metadata[0].type.id == mtype.id

    # finalization
    rmtree(tmppath)
Exemplo n.º 17
0
def loadsipmetadatatypes(types):
    """Load SIP metadata types."""
    with db.session.begin_nested():
        for type in types:
            db.session.add(SIPMetadataType(**type))
    db.session.commit()