def test_record_get_bucket_with_no_bucket(app, db, location):
    """Test retrival of the bucket when no bucket is associated."""
    record = Record.create({'title': 'test'}, with_bucket=False)
    db.session.commit()
    record = Record.get_record(record.id)
    assert record.bucket is None
    assert record.files is None
Пример #2
0
def create_fake_record(bulk_size, fake):
    """Create records for demo purposes."""
    records_bulk = []
    start = timeit.default_timer()
    for _ in range(bulk_size):
        # Create fake record metadata
        record_data = {
            "contributors": [{
                "name": fake.name()
            }],
            "description": fake.bs(),
            "title": fake.company() + "'s dataset",
        }

        # Create record in DB
        rec_uuid = uuid.uuid4()
        current_pidstore.minters["recid"](rec_uuid, record_data)
        Record.create(record_data, id_=rec_uuid)

        # Add record for bulk indexing
        records_bulk.append(rec_uuid)

    # Flush to index and database
    db.session.commit()
    click.secho(f"Writing {bulk_size} records to the database", fg="green")

    # Bulk index records
    ri = RecordIndexer()
    ri.bulk_index(records_bulk)
    current_search.flush_and_refresh(index="records")
    click.secho(f"Sending {bulk_size} records to be indexed", fg="green")
    stop = timeit.default_timer()
    click.secho(f"Creating {bulk_size} records took {stop - start}.",
                fg="green")
Пример #3
0
def create_object(bucket, record_dict):
    """Object creation inside the bucket using the file and its content."""

    rec_uuid = uuid4()
    provider = RecordIdProvider.create(object_type='rec', object_uuid=rec_uuid)

    files_meta, num_of_iiif_valid_files = generate_files_metadata(
        bucket, record_dict['_files'])

    # If there are any iiif valid image files, iiif manifest api is added on
    # record metadata.
    iiif_manifest_url = ''
    if num_of_iiif_valid_files > 0:
        iiif_manifest_url = '/record/{0}/iiif/manifest.json'.format(
            provider.pid.pid_value)
    deposit_dict = record_dict['_deposit']
    deposit_dict['iiif_manifest'] = iiif_manifest_url

    data = {
        'pid_value': provider.pid.pid_value,
        '_deposit': deposit_dict,
        '_files': files_meta,
    }

    # from invenio_records_files.api import Record as RecordFile
    record = RecordFile.create(data, id_=rec_uuid)

    # connect to record and bucket
    db.session.add(RecordsBuckets(
        record_id=record.id,
        bucket_id=bucket.id,
    ))
    db.session.commit()
Пример #4
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #5
0
def datacite_register(pid_value, record_uuid):
    """Mint the DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    """
    try:
        record = Record.get_record(record_uuid)
        # Bail out if not a Zenodo DOI.
        if not is_local_doi(record['doi']):
            return

        dcp = DataCiteProvider.get(record['doi'])

        url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
            recid=pid_value)
        doc = datacite_v31.serialize(dcp.pid, record)

        if dcp.pid.status == PIDStatus.REGISTERED:
            dcp.update(url, doc)
        else:
            dcp.register(url, doc)
        db.session.commit()
    except Exception as exc:
        datacite_register.retry(exc=exc)
Пример #6
0
def add_oai_information(obj, eng):
    """Adds OAI information like identifier"""

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_oai' not in existing_record:
        try:
            oaiid_minter(pid.object_uuid, existing_record)
        except PIDAlreadyExists:
            oai_prefix = current_app.config.get('OAISERVER_ID_PREFIX')
            existing_record['_oai'] = {
                'id': '%s:%s' % (oai_prefix, recid),
                'sets': _get_oai_sets(existing_record)
            }

    if 'id' not in existing_record['_oai']:
        current_app.logger.info('adding new oai id')
        oaiid_minter(pid.object_uuid, existing_record)

    if 'sets' not in existing_record[
            '_oai'] or not existing_record['_oai']['sets']:
        existing_record['_oai']['sets'] = _get_oai_sets(existing_record)

    existing_record['_oai']['updated'] = datetime.utcnow().strftime(
        '%Y-%m-%dT%H:%M:%SZ')

    existing_record.commit()
    obj.save()
    db.session.commit()
Пример #7
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                headers = file_.get('headers', {})
                data = requests_retry_session().get(file_['url'],
                                                    headers=headers)

                if data.status_code != 200:
                    __halt_and_notify(
                        "Error during acquiring files.\nHTTP status: %d\nUrl: %s\nHeaders:%s"
                        % (data.status_code, file_['url'], headers), eng)

                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Пример #8
0
def check_records_migration(app):
    """Check that a set of records have been migrated."""
    expected_records = _load_json('expected_records.json')
    for exp_record in expected_records:
        db_record = Record.get_record(exp_record['id'], with_deleted=True)
        assert str(db_record.created) == exp_record['created']
        # If the record is deleted there is no metadata to check
        if db_record.model.json is None:
            continue
        # Check that the parent pid is minted properly
        parent_pid = b2share_parent_pid_fetcher(exp_record['id'],
                                                db_record)
        fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record)
        record_pid = PersistentIdentifier.get(fetched_pid.pid_type,
                                              fetched_pid.pid_value)
        assert PIDVersioning(record_pid).parent.pid_value == parent_pid.pid_value
        # Remove the parent pid as it has been added by the migration
        db_record['_pid'].remove({
            'type': RecordUUIDProvider.parent_pid_type,
            'value': parent_pid.pid_value,
        })
        # The OAI-PMH identifier has been modified by the migration
        if db_record.get('_oai'):
            oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:')
            record_id = exp_record['metadata']['_deposit']['id']
            assert db_record['_oai']['id'] == str(oai_prefix) + record_id
            exp_record['metadata']['_oai']['id'] = db_record['_oai']['id']
        assert db_record == exp_record['metadata']
Пример #9
0
def add_oai_information(obj, eng):
    """Adds OAI information like identifier"""

    recid = obj.data['control_number']
    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_oai' not in existing_record:
        try:
            oaiid_minter(pid.object_uuid, existing_record)
        except PIDAlreadyExists:
            existing_record['_oai'] = {
                'id': 'oai:beta.scoap3.org:%s' % recid,
                'sets': _get_oai_sets(existing_record)
            }

    if 'id' not in existing_record['_oai']:
        current_app.logger.info('adding new oai id')
        oaiid_minter(pid.object_uuid, existing_record)

    if 'sets' not in existing_record['_oai'] or not existing_record['_oai']['sets']:
        existing_record['_oai']['sets'] = _get_oai_sets(existing_record)

    existing_record['_oai']['updated'] = datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ')

    existing_record.commit()
    obj.save()
    db.session.commit()
Пример #10
0
def list_db_published_records():
    """A generator for all the published records"""
    query = RecordMetadata.query.filter(RecordMetadata.json is not None)
    for obj in query.all():
        record = Record(obj.json, model=obj)
        if is_publication(record.model):
            yield record
Пример #11
0
def store_record(obj, eng):
    """Stores record in database"""
    if 'Italiana di Fisica'.lower(
    ) in obj.data['abstracts'][0]['source'].lower():
        obj.data['abstracts'][0]['source'] = 'Springer/SIF'
    if 'Italiana di Fisica'.lower(
    ) in obj.data['acquisition_source']['source'].lower():
        obj.data['acquisition_source']['source'] = 'Springer/SIF'

    obj.data['record_creation_year'] = parse_date(
        obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        pid = scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

        # Index record
        indexer = RecordIndexer()
        indexer.index_by_id(pid.object_uuid)

    except ValidationError as err:
        __halt_and_notify("Validation error: %s. Skipping..." % (err, ), obj,
                          eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", obj, eng)
Пример #12
0
def transfer_cp(uuid, destination):
    """Transfer the files contained in the record to the destination.

    This method is automatically called by the module to transfer the files.
    Depending on your installation, you may want to have a different behavior
    (copy among servers...). Then, you can create your own factory and link it
    into the config variable
    :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FACTORY`.

    :param uuid: the id of the record containing files to transfer
    :param destination: the destination folder - this will be what is
        inside the config variable
        :py:data:`invenio_archivematica.config.ARCHIVEMATICA_TRANSFER_FOLDER`.
        It needs to be a absolute path to a folder
    """
    record = Record.get_record(uuid)
    pid = PersistentIdentifier.get_by_object("recid", "rec", uuid)
    dir_name = join(destination,
                    create_accessioned_id(pid.pid_value, 'recid'))
    try:
        mkdir(dir_name)
    except:
        pass
    for fileobj in record.files:
        copyfile(fileobj.file.storage().fileurl,
                 join(dir_name, fileobj.key))
Пример #13
0
def update_record(pid, schema, data):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    record['$schema'] = schema
    record.update(data)
    record.commit()
    return record
Пример #14
0
def update_expired_embargoes():
    """Release expired embargoes every midnight."""
    logger = current_app.logger
    base_url = urlunsplit((
        current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
        current_app.config['JSONSCHEMAS_HOST'],
        current_app.config.get('APPLICATION_ROOT') or '', '', ''
    ))
    # The task needs to run in a request context as JSON Schema validation
    # will use url_for.
    with current_app.test_request_context('/', base_url=base_url):
        s = B2ShareRecordsSearch(
            using=current_search_client,
            index='records'
        ).query(
            'query_string',
            query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
                datetime.now(timezone.utc).isoformat()
            ),
            allow_leading_wildcard=False
        ).fields([])
        record_ids = [hit.meta.id for hit in s.scan()]
        if record_ids:
            logger.info('Changing access of {} embargoed publications'
                        ' to public.'.format(len(record_ids)))
        for record in Record.get_records(record_ids):
            logger.debug('Making embargoed publication {} public'.format(
                record.id))
            record['open_access'] = True
            record.commit()
        db.session.commit()

        indexer = RecordIndexer()
        indexer.bulk_index(record_ids)
        indexer.process_bulk_queue()
Пример #15
0
def datacite_register(self,
                      pid_value,
                      record_uuid,
                      max_retries=5,
                      countdown=5):
    """Mint the DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    """
    try:
        record = Record.get_record(record_uuid)
        # Bail out if not a CDS DOI.
        if not is_local_doi(record['doi']) or \
                not current_app.config['DEPOSIT_DATACITE_MINTING_ENABLED']:
            return

        dcp = DataCiteProvider.get(record['doi'])

        url = current_app.config['CDS_RECORDS_UI_LINKS_FORMAT'].format(
            recid=pid_value)
        doc = datacite_v31.serialize(dcp.pid, record)

        if dcp.pid.status == PIDStatus.REGISTERED:
            dcp.update(url, doc)
        else:
            dcp.register(url, doc)
        db.session.commit()
    except Exception as exc:
        db.session.rollback()
        raise self.retry(max_retries=max_retries, countdown=countdown, exc=exc)
Пример #16
0
def test_record_create_no_bucket(app, db, location):
    """Test record creation without bucket creation."""
    record = Record.create({}, with_bucket=False)
    db.session.commit()
    assert record.files is None
    assert '_bucket' not in record
    assert '_files' not in record
Пример #17
0
def glossary_terms():
    """Load demo terms records."""
    from invenio_db import db
    from invenio_records import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.termid import \
        cernopendata_termid_minter

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/glossary-term-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    glossary_terms_json = glob.glob(os.path.join(data, 'terms', '*.json'))

    for filename in glossary_terms_json:

        click.echo('Loading glossary-terms from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                if "collections" not in data and \
                    not isinstance(
                        data.get("collections", None), basestring):
                    data["collections"] = []
                data["collections"].append({"primary": "Terms"})
                id = uuid.uuid4()
                cernopendata_termid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #18
0
def test_record_create_files(app, db, location):
    """Test record creation with bucket and files."""
    record = Record.create({'title': 'test'})
    record.files['hello.txt'] = BytesIO(b'Hello world!')
    db.session.commit()
    assert record['_bucket'] == record.bucket_id
    assert record['_files']
def test_transfer_cp(db):
    """Test factories.transfer_cp function."""
    # first we create a record
    recid = uuid.uuid4()
    PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    record = Record.create({'title': 'record test'}, recid)
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    db.session.commit()
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Aaah! A headcrab!!!\n'
    record_buckets = RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['crab.txt'] = BytesIO(content)
    # test!
    rec_dir = join(tmppath, create_accessioned_id('1337', 'recid'))
    factories.transfer_cp(record.id, tmppath)
    assert isdir(rec_dir)
    assert isfile(join(rec_dir, 'crab.txt'))
    with open(join(rec_dir, 'crab.txt'), "r") as f:
        assert f.read() == content
    # finalization
    rmtree(tmppath)
Пример #20
0
def update_expired_embargos():
    """Release expired embargoes every midnight."""
    logger = current_app.logger
    base_url = urlunsplit(
        (current_app.config.get('PREFERRED_URL_SCHEME', 'http'),
         current_app.config['JSONSCHEMAS_HOST'],
         current_app.config.get('APPLICATION_ROOT') or '', '', ''))
    # The task needs to run in a request context as JSON Schema validation
    # will use url_for.
    with current_app.test_request_context('/', base_url=base_url):
        s = B2ShareRecordsSearch(
            using=current_search_client, index='records').query(
                'query_string',
                query='open_access:false AND embargo_date:{{* TO {0}}}'.format(
                    datetime.now(timezone.utc).isoformat()),
                allow_leading_wildcard=False).fields([])
        record_ids = [hit.meta.id for hit in s.scan()]
        if record_ids:
            logger.info('Changing access of {} embargoed publications'
                        ' to public.'.format(len(record_ids)))
        for record in Record.get_records(record_ids):
            logger.debug('Making embargoed publication {} public'.format(
                record.id))
            record['open_access'] = True
            record.commit()
        db.session.commit()

        indexer = RecordIndexer()
        indexer.bulk_index(record_ids)
        indexer.process_bulk_queue()
Пример #21
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                request = urllib2.Request(file_['url'],
                                          headers=file_.get('headers', {}))
                f = urllib2.urlopen(request)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[
                file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', obj, eng)
Пример #22
0
def check_records_migration(app):
    """Check that a set of records have been migrated."""
    expected_records = _load_json('expected_records.json')
    for exp_record in expected_records:
        db_record = Record.get_record(exp_record['id'], with_deleted=True)
        assert str(db_record.created) == exp_record['created']
        # If the record is deleted there is no metadata to check
        if db_record.model.json is None:
            continue
        # Check that the parent pid is minted properly
        parent_pid = b2share_parent_pid_fetcher(exp_record['id'], db_record)
        fetched_pid = b2share_record_uuid_fetcher(exp_record['id'], db_record)
        record_pid = PersistentIdentifier.get(fetched_pid.pid_type,
                                              fetched_pid.pid_value)
        assert PIDNodeVersioning(
            record_pid).parent.pid_value == parent_pid.pid_value
        # Remove the parent pid as it has been added by the migration
        db_record['_pid'].remove({
            'type': RecordUUIDProvider.parent_pid_type,
            'value': parent_pid.pid_value,
        })
        # The OAI-PMH identifier has been modified by the migration
        if db_record.get('_oai'):
            oai_prefix = app.config.get('OAISERVER_ID_PREFIX', 'oai:')
            record_id = exp_record['metadata']['_deposit']['id']
            assert db_record['_oai']['id'] == str(oai_prefix) + record_id
            exp_record['metadata']['_oai']['id'] = db_record['_oai']['id']
        assert db_record == exp_record['metadata']
Пример #23
0
def record(app, db):
    """Create a record."""
    record = {'title': 'fuu'}
    record = Record.create(record)
    record.commit()
    db.session.commit()
    return record
Пример #24
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Пример #25
0
def find_version_master_and_previous_record(version_of):
    """Retrieve the PIDNodeVersioning and previous record of a record PID.

    :params version_of: record PID.
    """

    from b2share.modules.records.providers import RecordUUIDProvider
    from b2share.modules.records.utils import is_publication

    try:
        child_pid = RecordUUIDProvider.get(version_of).pid
        if child_pid.status == PIDStatus.DELETED:
            raise RecordNotFoundVersioningError()
    except PIDDoesNotExistError as e:
        raise RecordNotFoundVersioningError() from e

    parent_pid = PIDNodeVersioning(pid=child_pid).parents.first()
    version_master = PIDNodeVersioning(pid=parent_pid)

    prev_pid = version_master.last_child
    assert prev_pid.pid_type == RecordUUIDProvider.pid_type
    prev_version = Record.get_record(prev_pid.object_uuid)
    # check that version_of references the last version of a record
    assert is_publication(prev_version.model)
    if prev_pid.pid_value != version_of:
        raise IncorrectRecordVersioningError(prev_pid.pid_value)
    return version_master, prev_version
Пример #26
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json')
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get("size"),
                              file.get("checksum"))

                    ObjectVersion.create(bucket, filename, _file_id=f.id)
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #27
0
def create_doc(data, schema):
    """Creates a new doc record."""
    from invenio_records import Record
    id = uuid.uuid4()
    cernopendata_docid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    return record
Пример #28
0
def create_doc(data, schema):
    """Creates a new doc record."""
    from invenio_records import Record
    id = uuid.uuid4()
    cernopendata_docid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    return record
Пример #29
0
def indexer_receiver(sender, json, doc_type, index, record):
    """To do."""
    from invenio_records_files.api import Record as FileRecord
    record = FileRecord.get_record(record.id)
    for file in record.files:
        if file.get('filetype') == 'raw_text':
            with file.file.storage().open() as f:
                json['fulltext'] = f.read().decode('utf-8').replace('\n', ' ')
Пример #30
0
def minted_record(app, db):
    """Create a test record."""
    data = {'title': 'fuu'}
    with db.session.begin_nested():
        rec_uuid = uuid.uuid4()
        pid = current_pidstore.minters['recid'](rec_uuid, data)
        record = Record.create(data, id_=rec_uuid)
    return pid, record
Пример #31
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def test_file_download_ui(app, objects):
    """Test get buckets."""
    app.config.update(dict(
        FILES_REST_PERMISSION_FACTORY=lambda *a, **kw: type(
            'Allow', (object, ), {'can': lambda self: True}
        )(),
        RECORDS_UI_DEFAULT_PERMISSION_FACTORY=None,  # No permission checking
        RECORDS_UI_ENDPOINTS=dict(
            recid=dict(
                pid_type='recid',
                route='/records/<pid_value>',
            ),
            recid_files=dict(
                pid_type='recid',
                route='/records/<pid_value>/files/<filename>',
                view_imp='invenio_records_files.utils:file_download_ui',
                record_class='invenio_records_files.api:Record',
            ),
        )
    ))
    InvenioRecordsUI(app)

    obj1 = objects[0]

    with app.test_request_context():
        # Record 1 - Live record
        rec_uuid = uuid.uuid4()
        PersistentIdentifier.create(
            'recid', '1', object_type='rec', object_uuid=rec_uuid,
            status=PIDStatus.REGISTERED)
        record = Record.create({
            'title': 'Registered',
            'recid': 1,
            '_files': [
                {'key': obj1.key, 'bucket': str(obj1.bucket_id),
                 'checksum': 'invalid'},
            ]
        }, id_=rec_uuid)
        RecordsBuckets.create(record=record.model, bucket=obj1.bucket)
        db.session.commit()

        main_url = url_for('invenio_records_ui.recid', pid_value='1')
        file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename=obj1.key)
        no_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='')
        invalid_file_url = url_for(
            'invenio_records_ui.recid_files', pid_value='1', filename='no')

    with app.test_client() as client:
        res = client.get(main_url)
        assert res.status_code == 200
        res = client.get(file_url)
        assert res.status_code == 200
        res = client.get(no_file_url)
        assert res.status_code == 404
        res = client.get(invalid_file_url)
        assert res.status_code == 404
Пример #33
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
def record(app, db):
    """Create a record."""
    record = {
        'title': 'fuu'
    }
    record = Record.create(record)
    record.commit()
    db.session.commit()
    return record
Пример #35
0
def record_with_bucket(full_record, bucket, db):
    """Create a bucket."""
    record = RecordFile.create(full_record)
    RecordsBuckets.create(bucket=bucket, record=record.model)
    pid = PersistentIdentifier.create(
        pid_type='recid', pid_value=12345, object_type='rec',
        object_uuid=record.id, status='R')
    db.session.commit()
    return pid, record
Пример #36
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id, with_bucket=not skip_files)
    if not skip_files:
        handle_record_files(data, record.bucket, files, skip_files)

    return record
Пример #37
0
    def publish(self, pid=None, id_=None):
        """Publish a deposit."""
        pid = pid or self.pid

        if not pid.is_registered():
            raise PIDInvalidAction()

        self['_deposit']['status'] = 'published'

        if self['_deposit'].get('pid') is None:  # First publishing
            minter = current_pidstore.minters[
                current_app.config['DEPOSIT_PID_MINTER']
            ]
            id_ = id_ or uuid.uuid4()
            record_pid = minter(id_, self)

            self['_deposit']['pid'] = {
                'type': record_pid.pid_type, 'value': record_pid.pid_value,
                'revision_id': 0,
            }

            data = dict(self.dumps())
            data['$schema'] = self.record_schema

            # During first publishing create snapshot the bucket.
            @contextmanager
            def process_files(data):
                """Process deposit files."""
                if self.files and self.files.bucket:
                    assert not self.files.bucket.locked
                    self.files.bucket.locked = True
                    snapshot = self.files.bucket.snapshot(lock=True)
                    data['_files'] = self.files.dumps(bucket=snapshot.id)
                    yield data
                    db.session.add(RecordsBuckets(
                        record_id=id_, bucket_id=snapshot.id
                    ))
                else:
                    yield data

            with process_files(data) as data:
                record = Record.create(data, id_=id_)
        else:  # Update after edit
            record_pid, record = self.fetch_published()
            # TODO add support for patching
            assert record.revision_id == self['_deposit']['pid']['revision_id']

            data = dict(self.dumps())
            data['$schema'] = self.record_schema
            record = record.__class__(data, model=record.model)
            record.commit()

        self.commit()
        return self
Пример #38
0
def record_with_bucket(full_record, bucket, db):
    """Create a bucket."""
    record = RecordFile.create(full_record)
    RecordsBuckets.create(bucket=bucket, record=record.model)
    pid = PersistentIdentifier.create(pid_type='recid',
                                      pid_value=1,
                                      object_type='rec',
                                      object_uuid=record.id,
                                      status='R')
    db.session.commit()
    return pid, record
Пример #39
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Пример #40
0
def record(db):
    """Record fixture."""
    rec_uuid = uuid.uuid4()
    provider = RecordIdProvider.create(
        object_type='rec', object_uuid=rec_uuid)
    record = Record.create({
        'control_number': provider.pid.pid_value,
        'title': 'TestDefault',
    }, id_=rec_uuid)
    db.session.commit()
    return record
Пример #41
0
def check_dois(record, allrecords, update):
    """ Checks that DOIs of records in the current instance are registered.
    """
    if record:
        record = Record.get_record(record)
        check_record_doi(record, update)
    elif allrecords:
        click.secho('checking DOI for all records')
        for record in list_db_published_records():
            check_record_doi(record, update)
    else:
        raise click.ClickException('Either -r or -a option must be selected')
Пример #42
0
def check_dois(record, allrecords, update):
    """ Checks that DOIs of records in the current instance are registered.
    """
    if record:
        record = Record.get_record(record)
        check_record_doi(record, update)
    elif allrecords:
        click.secho('checking DOI for all records')
        for record in list_db_published_records():
            check_record_doi(record, update)
    else:
        raise click.ClickException('Either -r or -a option must be selected')
Пример #43
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(record=record.model, bucket=bucket)

    return record
Пример #44
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)

    return record
Пример #45
0
 def check_embargo(record_id, is_embargoed):
     with app.app_context():
         with app.test_client() as client:
             login_user(non_creator, client)
             # test open_access field in record's metadata
             record = Record.get_record(record_id)
             assert record['open_access'] != is_embargoed
             # test record's file access
             subtest_file_bucket_permissions(
                 client, record.files.bucket,
                 access_level=None if is_embargoed else 'read',
                 is_authenticated=True
             )
Пример #46
0
def test_filesmixin(app, db, location, record):
    """Test bucket creation and assignment."""
    class CustomFilesMixin(FilesMixin):
        def _create_bucket(self):
            return Bucket.create()

    class CustomRecord(Record, CustomFilesMixin):
        pass

    record = CustomRecord.create({})
    assert record.files is not None

    record = Record.create({})
    assert record.files is None
Пример #47
0
def check_pids_migration():
    """Check that the persistent identifiers have been migrated."""
    expected_pids = _load_json('expected_pids.json')
    # Check unchanging properties
    for exp_pid in expected_pids:
        db_pid = PersistentIdentifier.get(exp_pid['pid_type'],
                                          exp_pid['pid_value'])
        for key, value in exp_pid.items():
            if key != 'updated':
                assert str(getattr(db_pid, key)) == str(value)

        # check that deleted PID's records are (soft or hard) deleted
        if exp_pid['status'] == PIDStatus.DELETED.value:
            metadata = None
            try:
                record = Record.get_record(exp_pid['pid_value'],
                                           with_deleted=True)
                # Soft deleted record
                metadata = record.model.json
            except NoResultFound:
                # Hard deleted record
                pass
            assert metadata is None

        # Check versioning relations and PIDs
        if exp_pid['pid_type'] == 'b2dep':
            try:
                rec_pid = PersistentIdentifier.get('b2rec',
                                                    exp_pid['pid_value'])
                # if the deposit is deleted, either the record PID was reserved
                # and has been deleted, or it still exists.
                if db_pid.status == PIDStatus.DELETED:
                    assert rec_pid.status != PIDStatus.RESERVED
            except PIDDoesNotExistError:
                # The record PID was only reserved and has been deleted
                # with the deposit PID.
                assert db_pid.status == PIDStatus.DELETED
                continue

            # Check that a parent pid has been created
            versioning = PIDVersioning(child=rec_pid)
            parent = versioning.parent
            assert rec_pid.status in [PIDStatus.RESERVED, PIDStatus.REGISTERED]
            if rec_pid.status == PIDStatus.RESERVED:
                assert parent.status == PIDStatus.RESERVED
            else:
                assert parent.status == PIDStatus.REDIRECTED
                assert parent.get_redirect() == rec_pid
Пример #48
0
def datacite_register(pid_value, record_uuid):
    """Mint the DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    """
    record = Record.get_record(record_uuid)
    dcp = DataCiteProvider.get(record['doi'])

    url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
        recid=pid_value)
    doc = datacite_v31.serialize(dcp.pid, record)

    dcp.update(url, doc) if dcp.pid.status == PIDStatus.REGISTERED \
        else dcp.register(url, doc)
    db.session.commit()
Пример #49
0
def datacite_register(pid_value, record_uuid):
    """Mint DOI and Concept DOI with DataCite.

    :param pid_value: Value of record PID, with pid_type='recid'.
    :type pid_value: str
    :param record_uuid: Record Metadata UUID.
    :type record_uuid: str
    """
    try:
        record = Record.get_record(record_uuid)
        # Bail out if not a Zenodo DOI.
        if not is_local_doi(record['doi']):
            return

        dcp = DataCiteProvider.get(record['doi'])
        doc = datacite_v41.serialize(dcp.pid, record)

        url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
            recid=pid_value)
        if dcp.pid.status == PIDStatus.REGISTERED:
            dcp.update(url, doc)
        else:
            dcp.register(url, doc)

        # If this is the latest record version, update/register the Concept DOI
        # using the metadata of the record.
        recid = PersistentIdentifier.get('recid', str(record['recid']))
        pv = PIDVersioning(child=recid)
        conceptdoi = record.get('conceptdoi')
        if conceptdoi and pv.exists and pv.is_last_child:
            conceptrecid = record.get('conceptrecid')
            concept_dcp = DataCiteProvider.get(conceptdoi)
            url = current_app.config['ZENODO_RECORDS_UI_LINKS_FORMAT'].format(
                recid=conceptrecid)

            doc = datacite_v41.serialize(concept_dcp.pid, record)
            if concept_dcp.pid.status == PIDStatus.REGISTERED:
                concept_dcp.update(url, doc)
            else:
                concept_dcp.register(url, doc)

        db.session.commit()
    except Exception as exc:
        datacite_register.retry(exc=exc)
def test_files_protection(app, db, location):
    """Test record files property protection."""
    record = Record.create({})

    bucket = record.files.bucket
    assert bucket

    # Create first file:
    record.files['hello.txt'] = BytesIO(b'Hello world!')

    file_0 = record.files['hello.txt']
    assert 'hello.txt' == file_0['key']
    assert 1 == len(record.files)

    # Lock bucket.
    bucket.locked = True

    assert record.files.bucket.locked
    with pytest.raises(InvalidOperationError):
        del record.files['hello.txt']
Пример #51
0
def find_version_master_and_previous_record(version_of):
    """Retrieve the PIDVersioning and previous record of a record PID.

    :params version_of: record PID.
    """
    try:
        child_pid = RecordUUIDProvider.get(version_of).pid
        if child_pid.status == PIDStatus.DELETED:
            raise RecordNotFoundVersioningError()
    except PIDDoesNotExistError as e:
        raise RecordNotFoundVersioningError() from e

    version_master = PIDVersioning(child=child_pid)

    prev_pid = version_master.last_child
    assert prev_pid.pid_type == RecordUUIDProvider.pid_type
    prev_version = Record.get_record(prev_pid.object_uuid)
    # check that version_of references the last version of a record
    assert is_publication(prev_version.model)
    if prev_pid.pid_value != version_of:
        raise IncorrectRecordVersioningError(prev_pid.pid_value)
    return version_master, prev_version
Пример #52
0
def test_RecordSIP(db):
    """Test RecordSIP API class."""
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    # we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    title = {'title': 'record test'}
    record = Record.create(title, recid)
    # we create the models
    sip = SIP.create(True, user_id=user.id, agent=agent)
    recordsip = RecordSIP_(sip_id=sip.id, pid_id=pid.id)
    db.session.commit()
    # We create an API SIP on top of it
    api_recordsip = RecordSIP(recordsip, sip)
    assert api_recordsip.model is recordsip
    assert api_recordsip.sip.id == sip.id
Пример #53
0
def update_record(pid, schema, data, files, skip_files):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    with db.session.begin_nested():
        if record.files and not skip_files:
            bucket_id = record.files.bucket
            bucket = Bucket.get(bucket_id.id)
            for o in ObjectVersion.get_by_bucket(bucket).all():
                o.remove()
                o.file.delete()
            RecordsBuckets.query.filter_by(
                record=record.model,
                bucket=bucket
            ).delete()
            bucket_id.remove()
    db.session.commit()
    record.update(data)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)
    return record
Пример #54
0
def check_handles(update, record_pid):
    """Allocate handles for a record and its files, if necessary."""
    rec_pid = RecordUUIDProvider.get(pid_value=record_pid).pid
    record = Record.get_record(rec_pid.object_uuid)
    record_updated = False

    pid_list = [p.get('value') for p in record['_pid']
                if p.get('type') == 'ePIC_PID']
    if pid_list:
        click.secho('record {} already has a handle'.format(record_pid), fg='green')
    else:
        click.secho('record {} has no handle'.format(record_pid), fg='red')
        if update:
            b2share_pid_minter(rec_pid, record)
            record_updated = True
            click.secho('    handle added to record', fg='green')
        else:
            click.secho('use -u argument to add a handle to the record')

    files_ok = True
    for f in record.get('_files', []):
        if f.get('ePIC_PID'):
            click.secho('file {} already has a handle'.format(f.get('key')), fg='green')
        else:
            click.secho('file {} has no handle'.format(f.get('key')), fg='red')
            files_ok = False

    if update and not files_ok:
        create_file_pids(record)
        record_updated = True
        click.secho('    files updated with handles', fg='green')
    elif not update and not files_ok:
         click.secho('use -u argument to add handles to the files')

    if record_updated:
        record.commit()
        db.session.commit()
def test_cascade_action_record_delete(app, db, location, record_with_bucket,
                                      generic_file, force,
                                      num_of_recordbuckets):
    """Test cascade action on record delete, with force false."""
    record = record_with_bucket
    record_id = record.id
    bucket_id = record.files.bucket.id

    # check before
    assert len(RecordsBuckets.query.all()) == 1
    assert len(Bucket.query.all()) == 1
    assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1
    assert ObjectVersion.get(bucket=bucket_id, key=generic_file)

    record.delete(force=force)

    # check after
    db.session.expunge(record.model)
    with pytest.raises(NoResultFound):
        record = Record.get_record(record_id)
    assert len(RecordsBuckets.query.all()) == num_of_recordbuckets
    assert len(Bucket.query.all()) == 1
    assert len(Bucket.query.filter_by(id=bucket_id).all()) == 1
    assert ObjectVersion.get(bucket=bucket_id, key=generic_file)
Пример #56
0
def store_record(obj, eng):
    """Stores record in database"""
    set_springer_source_if_needed(obj)

    obj.data['record_creation_year'] = parse_date(obj.data['record_creation_date']).year

    try:
        record = Record.create(obj.data, id_=None)

        # Create persistent identifier.
        scoap3_recid_minter(str(record.id), record)
        obj.save()
        record.commit()

        # Commit to DB before indexing
        db.session.commit()
        obj.data['control_number'] = record['control_number']
        obj.save()

    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)

    except PIDAlreadyExists:
        __halt_and_notify("Record with this id already in DB", eng)
Пример #57
0
def update_record(obj, eng):
    """Updates existing record"""

    doi = get_first_doi(obj)

    query = {'query': {'bool': {'must': [{'match': {'dois.value': doi}}], }}}
    search_result = es.search(index='records-record', doc_type='record-v1.0.0', body=query)

    recid = search_result['hits']['hits'][0]['_source']['control_number']

    obj.extra_data['recid'] = recid
    obj.data['control_number'] = recid

    pid = PersistentIdentifier.get('recid', recid)
    existing_record = Record.get_record(pid.object_uuid)

    if '_files' in existing_record:
        obj.data['_files'] = existing_record['_files']
    if '_oai' in existing_record:
        obj.data['_oai'] = existing_record['_oai']

    # preserving original creation date
    creation_date = existing_record['record_creation_date']
    obj.data['record_creation_date'] = creation_date
    obj.data['record_creation_year'] = parse_date(creation_date).year
    existing_record.clear()
    existing_record.update(obj.data)

    try:
        existing_record.commit()
        obj.save()
        db.session.commit()
    except ValidationError as err:
        __halt_and_notify("Validation error: %s." % err, eng)
    except SchemaError as err:
        __halt_and_notify('SchemaError during record validation! %s' % err, eng)
def test_deposit_versions_create(app, test_records, test_users):
    """Creating new versions of existing records."""
    with app.app_context():
        # Retrieve a record which will be the first version
        v1 = test_records[0].data
        v1_rec = B2ShareRecord.get_record(test_records[0].record_id)
        v1_pid, v1_id = pid_of(v1)
        assert list_published_pids(v1_pid) == [v1_pid]

        # create draft in version chain:
        # version chain becomes: [v1] -- [v2 draft]
        # v2 = create_deposit({}, version_of=v1_id)
        data = copy_data_from_previous(v1_rec.model.json)
        v2 = create_deposit(data, test_users['deposits_creator'],
                            version_of=v1_id)
        assert filenames(v2) == []
        ObjectVersion.create(v2.files.bucket, 'myfile1',
                             stream=BytesIO(b'mycontent'))
        assert filenames(v2) == ['myfile1']

        assert list_published_pids(v1_pid) == [v1_pid]

        # cannot create another draft if one exists
        # not possible: [v1] -- [v2 draft]
        #                    `- [new draft]
        with pytest.raises(DraftExistsVersioningError):
            data = copy_data_from_previous(v1_rec.model.json)
            create_deposit(data, test_users['deposits_creator'],
                           version_of=v1_id)

        # cannot create a version from a draft pid
        # not possible: [v1] -- [v2 draft] -- [new draft]
        with pytest.raises(IncorrectRecordVersioningError): # record pid not created yet
            data = copy_data_from_previous(v1_rec.model.json)
            create_deposit(data, test_users['deposits_creator'],
                           version_of=v2['_deposit']['id'])

        # publish previous draft
        # version chain becomes: [v1] -- [v2]
        v2.submit()
        v2.publish()
        v2_pid, v2_id = pid_of(v2)
        assert list_published_pids(v1_pid) == [v1_pid, v2_pid]

        # cannot create draft based on the first version in a chain
        # not possible: [v1] -- [v2]
        #                    `- [new draft]
        with pytest.raises(IncorrectRecordVersioningError):
            data = copy_data_from_previous(v1_rec.model.json)
            create_deposit(data, test_users['deposits_creator'],
                           version_of=v1_id)

        # create and publish other versions:
        # version chain becomes: [v1] -- [v2] -- [v3]
        data = copy_data_from_previous(v1_rec.model.json)
        v3 = create_deposit(data, test_users['deposits_creator'],
                            version_of=v2_id)
        # assert files are imported from v2
        assert filenames(v3) == ['myfile1']
        ObjectVersion.create(v3.files.bucket, 'myfile2',
                                stream=BytesIO(b'mycontent'))
        assert filenames(v3) == ['myfile1', 'myfile2']

        assert list_published_pids(v1_pid) == [v1_pid, v2_pid]

        v3.submit()
        v3.publish()
        v3_pid, v3_id = pid_of(v3)
        v3_rec = Record.get_record(v3_id)
        assert filenames(v3_rec) == ['myfile1', 'myfile2']
        assert list_published_pids(v1_pid) == [v1_pid, v2_pid, v3_pid]

        # cannot create draft based on an intermediate version in a chain
        # not possible: [v1] -- [v2] -- [v3]
        #                            `- [new draft]
        with pytest.raises(IncorrectRecordVersioningError):
            create_deposit({}, test_users['deposits_creator'],
                           version_of=v2_id)

        # Create yet another version
        # Version chain becomes: [v1] -- [v2] -- [v3] -- [v4]
        data = copy_data_from_previous(v1_rec.model.json)
        v4 = create_deposit(data, test_users['deposits_creator'],
                            version_of=v3_id)
        v4.submit()
        v4.publish()
        assert filenames(v4) == ['myfile1', 'myfile2']
        v4_pid, v4_id = pid_of(v4)
        assert list_published_pids(v1_pid) == [
            v1_pid, v2_pid, v3_pid, v4_pid]

        # assert that creating a new version from a deleted pid is not allowed
        resolver = Resolver(pid_type=v4_pid.pid_type, object_type='rec',
                            getter=partial(B2ShareRecord.get_record,
                                           with_deleted=True))
        v4_pid, v4_rec = LazyPIDValue(resolver, v4_pid.pid_value).data
        # delete [v4]
        v4_rec.delete()
        with pytest.raises(RecordNotFoundVersioningError):
            v5 = create_deposit(data, test_users['deposits_creator'],
                                version_of=v4_id)
def alembic_upgrade_database_data(alembic, verbose):
    """Migrate the database data from v2.0.0 to 2.1.0."""
    ### Add versioning PIDs ###
    # Reserve the record PID and versioning PID for unpublished deposits

    # Hack: disable record indexing during record migration
    from invenio_indexer.api import RecordIndexer
    old_index_fn = RecordIndexer.index
    RecordIndexer.index = lambda s, record: None

    if verbose:
        click.secho('migrating deposits and records...')
    with db.session.begin_nested():
        # Migrate published records
        records_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == RecordUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for rec_pid in records_pids:
            if verbose:
                click.secho('    record {}'.format(rec_pid.pid_value))
            try:
                record = Record.get_record(rec_pid.object_uuid)
            except NoResultFound:
                # The record is deleted but not the PID. Fix it.
                rec_pid.status = PIDStatus.DELETED
                continue
            # Create parent version PID
            parent_pid = RecordUUIDProvider.create().pid
            version_master = PIDVersioning(parent=parent_pid)
            version_master.insert_draft_child(child=rec_pid)
            version_master.update_redirect()
            migrate_record_metadata(
                Record.get_record(rec_pid.object_uuid),
                parent_pid
            )

        # Migrate deposits
        deposit_pids = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == DepositUUIDProvider.pid_type,
            PersistentIdentifier.status == PIDStatus.REGISTERED,
        ).all()
        for dep_pid in deposit_pids:
            if verbose:
                click.secho('    deposit {}'.format(dep_pid.pid_value))
            try:
                deposit = Deposit.get_record(dep_pid.object_uuid)

                if deposit['publication_state'] != \
                        PublicationStates.published.name:
                    # The record is not published yet. Reserve the PID.
                    rec_pid = RecordUUIDProvider.create(
                        object_type='rec',
                        pid_value=dep_pid.pid_value,
                    ).pid
                    # Create parent version PID
                    parent_pid = RecordUUIDProvider.create().pid
                    assert parent_pid
                    version_master = PIDVersioning(parent=parent_pid)
                    version_master.insert_draft_child(child=rec_pid)
                else:
                    # Retrieve previously created version PID
                    rec_pid = RecordUUIDProvider.get(dep_pid.pid_value).pid
                    version_master = PIDVersioning(child=rec_pid)
                    parent_pid = version_master.parent
                    if not parent_pid:
                        click.secho('    record {} was deleted, but the deposit has not been removed'.format(rec_pid.pid_value), fg='red')

                if parent_pid:
                    migrate_record_metadata(
                        Deposit.get_record(dep_pid.object_uuid),
                        parent_pid
                    )
            except NoResultFound:
                # The deposit is deleted but not the PID. Fix it.
                dep_pid.status = PIDStatus.DELETED


    if verbose:
        click.secho('done migrating deposits.')
    RecordIndexer.index = old_index_fn