Пример #1
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            default_location=Location.get_default()
        )

        try:
            schema = data.get("$schema", None) \
                .split('/schemas/', 1)[1]
        except (IndexError, AttributeError):
            return None

        if schema:
            _deposit_group = \
                next(
                    (depgroup
                     for dg, depgroup
                     in current_app.config.get('DEPOSIT_GROUPS').iteritems()
                     if schema in depgroup['schema']
                     ),
                    None
                )

            data["_experiment"] = _deposit_group.get("experiment", "Unknown")

        deposit = super(CAPDeposit, cls).create(data, id_=id_)

        add_owner_permissions(deposit.id)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Пример #2
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Пример #3
0
def attach_files(obj, eng):
    if 'files' in obj.extra_data:
        recid = obj.data['control_number']
        pid = PersistentIdentifier.get('recid', recid)
        existing_record = Record.get_record(pid.object_uuid)

        if '_files' not in existing_record or not existing_record['_files']:
            bucket = Bucket.create()
            RecordsBuckets.create(record=existing_record.model, bucket=bucket)

        for file_ in obj.extra_data['files']:
            if file_['url'].startswith('http'):
                data = requests_retry_session().get(file_['url'], headers=file_.get('headers', {}))
                f = StringIO(data.content)
            else:
                f = open(file_['url'])

            existing_record.files[file_['name']] = f
            existing_record.files[file_['name']]['filetype'] = file_['filetype']

        obj.save()
        existing_record.commit()
        db.session.commit()
    else:
        __halt_and_notify('No files found.', eng)
Пример #4
0
 def link_to_record(cls, record, bucket):
     """Link a record its extra formats bucket."""
     if not record.get('_buckets', {}).get('extra_formats'):
         record.setdefault('_buckets', {})
         record['_buckets']['extra_formats'] = str(bucket.id)
         record.commit()
         RecordsBuckets.create(record=record.model, bucket=bucket)
Пример #5
0
def data_policies(skip_files):
    """Load demo Data Policy records."""
    from invenio_db import db
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets
    from invenio_records_files.api import Record

    from invenio_records.models import RecordMetadata

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/data-policies-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data')
    data_policies_json = glob.glob(os.path.join(data, '*.json'))

    for filename in data_policies_json:

        click.echo('Loading data-policies from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))
                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #6
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test', name='json-test',
                            format='json', schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create(
        'recid',
        '1337',
        object_type='rec',
        object_uuid=recid,
        status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True, autospec=True)
    record = Record.create(
        {'title': 'record test', '$schema': 'url://to/schema'},
        recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid, record, True, create_sip_files=False,
                            user_id=user.id, agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1
    # finalization
    rmtree(tmppath)
Пример #7
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    data['$schema'] = schema
    record = Record.create(data, id_=id)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)

    return record
Пример #8
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Пример #9
0
    def clone(self, pid=None, id_=None):
        """Clone a deposit.

        Adds snapshot of the files when deposit is cloned.
        """
        data = copy.deepcopy(self.dumps())
        del data['_deposit']
        deposit = super(CAPDeposit, self).create(data, id_=id_)
        deposit['_deposit']['cloned_from'] = {
            'type': pid.pid_type,
            'value': pid.pid_value,
            'revision_id': self.revision_id,
        }
        bucket = self.files.bucket.snapshot()
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        # optionally we might need to do: deposit.files.flush()
        deposit.commit()
        return deposit
Пример #10
0
def _create_records(base_metadata, total, versions, files):
    records = []
    cur_recid_val = 1
    for _ in range(total):
        conceptrecid_val = cur_recid_val
        conceptrecid = PersistentIdentifier.create(
            'recid', str(conceptrecid_val), status='R')
        db.session.commit()
        versioning = PIDVersioning(parent=conceptrecid)
        for ver_idx in range(versions):
            recid_val = conceptrecid_val + ver_idx + 1
            data = deepcopy(base_metadata)
            data.update({
                'conceptrecid': str(conceptrecid_val),
                'conceptdoi': '10.1234/{}'.format(recid_val),
                'recid': recid_val,
                'doi': '10.1234/{}'.format(recid_val),
            })
            record = ZenodoRecord.create(data)
            bucket = Bucket.create()
            record['_buckets'] = {'record': str(bucket.id)}
            record.commit()
            RecordsBuckets.create(bucket=bucket, record=record.model)
            recid = PersistentIdentifier.create(
                pid_type='recid', pid_value=record['recid'], object_type='rec',
                object_uuid=record.id, status='R')
            versioning.insert_child(recid)

            file_objects = []
            for f in range(files):
                filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx)
                record.files[filename] = BytesIO(b'1234567890')  # 10 bytes
                record.files[filename]['type'] = 'pdf'
                file_objects.append(record.files[filename].obj)
            record.commit()

            db.session.commit()
            records.append((recid, record, file_objects))
        cur_recid_val += versions + 1
    return records
Пример #11
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(**cls._get_bucket_settings())
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get(
            'recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get(
            'recid', str(data['conceptrecid']))
        depid = PersistentIdentifier.get(
            'depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Пример #12
0
    def _create_bucket(self, location=None, storage_class=None):
        """Create bucket and return it.

        Note:
            Overwrites base_class._create_bucket method as it is not implemented
            It can create more than one bucket for the same parameters.
            It's private method, do not use it. Instead use `get_bucket()`

        Args:
            location (Location): Bucket location object
                (default: 'RECORDS_DEFAULT_FILE_LOCATION_NAME') from config
            storage_class (str): Bucket storage class
                (default: 'RECORDS_DEFAULT_STORAGE_CLASS') from config

        Returns: Bucket for current record, selected location and storage_class
        """
        bucket = Bucket.create(location=location, storage_class=storage_class)
        RecordsBuckets.create(record=self.model, bucket=bucket)
        LOGGER.info("Created bucket",
                    uuid=self.id,
                    class_name=self.__class__.__name__)
        return bucket
Пример #13
0
def _process_files(record, files_metadata):
    """Attach files to a record with a given metadata.

    Assumptions:
    - The source must be a URL pointing to a tar file.
    - All files listed in the metadata are inside the source tar.
    - Master files are listed before slaves.
    - The reference from the slave to master is done via key.
    """
    if not files_metadata:
        return
    bucket = Bucket.create(location=Location.get_by_name('videos'))
    RecordsBuckets.create(record=record.model, bucket=bucket)
    response = requests.get(
        files_metadata['source'], stream=True, verify=False)

    # Throw an error for bad status codes
    response.raise_for_status()

    with tempfile.NamedTemporaryFile(suffix='.tar', delete=False) as f:
        for chunk in response:
            f.write(chunk)
    tar = tarfile.open(name=f.name)
    tar.extractall(path=tempfile.gettempdir())
    files_base_dir = os.path.join(tempfile.gettempdir(), tar.getnames()[0])
    tar.close()
    os.remove(f.name)

    for f in files_metadata['metadata']:
        obj = ObjectVersion.create(bucket, f['key'])
        with open(os.path.join(files_base_dir, f['key']), 'rb') as fp:
            obj.set_contents(fp)
        for k, v in f['tags'].items():
            if k == 'master':
                v = ObjectVersion.get(bucket, v).version_id
            ObjectVersionTag.create(obj, k, v)
    shutil.rmtree(files_base_dir)

    record['_files'] = record.files.dumps()
Пример #14
0
def _create_records(base_metadata, total, versions, files):
    records = []
    cur_recid_val = 1
    for _ in range(total):
        conceptrecid_val = cur_recid_val
        conceptrecid = PersistentIdentifier.create(
            'recid', str(conceptrecid_val), status='R')
        db.session.commit()
        versioning = PIDVersioning(parent=conceptrecid)
        for ver_idx in range(versions):
            recid_val = conceptrecid_val + ver_idx + 1
            data = deepcopy(base_metadata)
            data.update({
                'conceptrecid': str(conceptrecid_val),
                'conceptdoi': '10.1234/{}'.format(recid_val),
                'recid': recid_val,
                'doi': '10.1234/{}'.format(recid_val),
            })
            record = ZenodoRecord.create(data)
            bucket = Bucket.create()
            RecordsBuckets.create(bucket=bucket, record=record.model)
            recid = PersistentIdentifier.create(
                pid_type='recid', pid_value=record['recid'], object_type='rec',
                object_uuid=record.id, status='R')
            versioning.insert_child(recid)

            file_objects = []
            for f in range(files):
                filename = 'Test{0}_v{1}.pdf'.format(f, ver_idx)
                record.files[filename] = BytesIO(b'1234567890')  # 10 bytes
                record.files[filename]['type'] = 'pdf'
                file_objects.append(record.files[filename].obj)
            record.commit()

            db.session.commit()
            records.append((recid, record, file_objects))
        cur_recid_val += versions + 1
    return records
Пример #15
0
    def create(cls, data, id_=None):
        """Create a deposit.

        Adds bucket creation immediately on deposit creation.
        """
        bucket = Bucket.create(
            quota_size=current_app.config['ZENODO_BUCKET_QUOTA_SIZE'],
            max_file_size=current_app.config['ZENODO_MAX_FILE_SIZE'],
        )
        data['_buckets'] = {'deposit': str(bucket.id)}
        deposit = super(ZenodoDeposit, cls).create(data, id_=id_)

        RecordsBuckets.create(record=deposit.model, bucket=bucket)

        recid = PersistentIdentifier.get('recid', str(data['recid']))
        conceptrecid = PersistentIdentifier.get('recid',
                                                str(data['conceptrecid']))
        depid = PersistentIdentifier.get('depid', str(data['_deposit']['id']))

        PIDVersioning(parent=conceptrecid).insert_draft_child(child=recid)
        RecordDraft.link(recid, depid)

        return deposit
Пример #16
0
def update_record(pid, schema, data, files, skip_files):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    with db.session.begin_nested():
        if record.files and not skip_files:
            bucket_id = record.files.bucket
            bucket = Bucket.get(bucket_id.id)
            for o in ObjectVersion.get_by_bucket(bucket).all():
                o.remove()
                o.file.delete()
            RecordsBuckets.query.filter_by(
                record=record.model,
                bucket=bucket
            ).delete()
            bucket_id.remove()
    db.session.commit()
    record.update(data)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)
    return record
Пример #17
0
def update_record(pid, schema, data, files, skip_files):
    """Updates the given record."""
    record = Record.get_record(pid.object_uuid)
    with db.session.begin_nested():
        if record.files and not skip_files:
            bucket_id = record.files.bucket
            bucket = Bucket.get(bucket_id.id)
            for o in ObjectVersion.get_by_bucket(bucket).all():
                o.remove()
                o.file.delete()
            RecordsBuckets.query.filter_by(
                record=record.model,
                bucket=bucket
            ).delete()
            bucket_id.remove()
    db.session.commit()
    record.update(data)
    if not skip_files:
        bucket = Bucket.create()
        handle_record_files(data, bucket, files, skip_files)
        RecordsBuckets.create(
            record=record.model, bucket=bucket)
    return record
Пример #18
0
    def create(cls, data, id_=None, **kwargs):
        """Create a CDS deposit.

        Adds bucket creation immediately on deposit creation.
        """
        if '_deposit' not in data:
            id_ = id_ or uuid.uuid4()
            cls.deposit_minter(id_, data)
        bucket = Bucket.create(location=Location.get_by_name(
            kwargs.get('bucket_location', 'default')))
        data['_buckets'] = {'deposit': str(bucket.id)}
        data.setdefault('_cds', {})
        data['_cds'].setdefault('state', {})
        data.setdefault('keywords', [])
        data.setdefault('license', [{
            'license': 'CERN',
            'material': '',
            'url': 'http://copyright.web.cern.ch',
        }])
        data.setdefault('_access', {})
        deposit = super(CDSDeposit, cls).create(
            data, id_=id_, validator=PartialDraft4Validator)
        RecordsBuckets.create(record=deposit.model, bucket=bucket)
        return deposit
Пример #19
0
def create_record(schema, data, files, skip_files):
    """Creates a new record."""
    bucket = Bucket.create()

    for file in files:
        if skip_files:
            break
        assert 'uri' in file
        assert 'size' in file
        assert 'checksum' in file

        try:
            f = FileInstance.create()
            filename = file.get("uri").split('/')[-1:][0]
            f.set_uri(file.get("uri"), file.get("size"), file.get("checksum"))
            obj = ObjectVersion.create(bucket, filename, _file_id=f.id)

            file.update({
                'bucket': str(obj.bucket_id),
                'checksum': obj.file.checksum,
                'key': obj.key,
                'version_id': str(obj.version_id),
            })

        except Exception as e:
            click.echo('Recid {0} file {1} could not be loaded due '
                       'to {2}.'.format(data.get('recid'), filename, str(e)))
            continue

    id = uuid.uuid4()
    cernopendata_recid_minter(id, data)
    record = Record.create(data, id_=id)
    record['$schema'] = schema
    RecordsBuckets.create(record=record.model, bucket=bucket)

    return record
Пример #20
0
 def _create_buckets(self):
     bucket = Bucket.create()
     RecordsBuckets.create(record=self.model, bucket=bucket)
Пример #21
0
def load_records_with_files(records,
                            upload_dir,
                            max=0,
                            verbose=0,
                            files=True,
                            cache=True,
                            skip=False):
    """Load records with files support.

       It also:
         - create thumbnail for pdf
         - extract text for pdf
         - append files to the bibliographic record

    :param records: list of records in JSON format.
    :param upload_dir: directory for temporary files will be used for cache.
    :param max: max records to load.
    :param verbose: verbose level.
    :param files: attach files if True.
    :param cache: use cache if True.
    :param skip: skip invalid records.
    :returns: list of touched uuids for indexing.
    """
    Logger.verbose = verbose
    rec_uuids = []
    n = 0
    resolver = Resolver('recid', 'rec', Record.get_record)
    if not files:
        warning('files are ignored')
    # stop if max record is reached
    if max:
        records = records[:max]
    count = len(records)
    click.secho('Starting loading {0} record ...'.format(len(records)),
                fg='green')

    with click.progressbar(records, length=count) as bar:
        for record in bar:

            # ignore record if does not contains document
            if not record.get('document'):
                if verbose > 1:
                    warning('%s do not contains document' %
                            record.get('recid'))
                continue
            recid = record.get('recid', '-1')
            if recid:
                info('record: %s detected...' % recid)
            update = True
            try:
                # record already exists in db?
                try:
                    pid, rec = resolver.resolve(recid)
                    rec_uuid = pid.object_uuid
                    info('record: %s exists, updating...' % recid)
                    rec.update(record)
                    rec.commit()
                # create new record
                except PIDDoesNotExistError:
                    update = False
                    # generate a new uuid
                    rec_uuid = uuid.uuid4()
                    # create mapping between recid and uuid
                    pid = PersistentIdentifier.create('recid',
                                                      recid,
                                                      object_type='rec',
                                                      object_uuid=rec_uuid)
                    # create Record
                    rec = Record.create(record, id_=rec_uuid)

                    bucket = Bucket.create()
                    RecordsBuckets.create(record=rec.model, bucket=bucket)
                    pid.register()
                    info('%s record created' % rec.get('recid'))
                    oaiid_minter(rec_uuid, rec)
                if files:
                    rec_upload_dir = os.path.join(upload_dir, recid)
                    try:
                        os.makedirs(rec_upload_dir)
                    except FileExistsError:
                        pass
                    for document in record.get('document'):
                        file_name = upload_file(document.get('url'),
                                                rec_upload_dir,
                                                force=not cache)
                        if file_name:
                            name = document.get('name')
                            rec.files[name] = open(file_name, 'rb')
                            rec.files[name]['filetype'] = 'main'
                            append_thumbnail(rec, document, rec_upload_dir,
                                             not cache)
                            append_extracted_text(rec, document,
                                                  rec_upload_dir, not cache)
                            rec.commit()
            except ValidationError as e:
                if not update:
                    pid.delete()
                else:
                    info('Record %s untouched' % recid)
                error('Invalid record (%s)' % recid)
                warning('Validation error: %s' % e)
                if not skip:
                    raise e
                continue
            else:
                db.session.flush()
                # touched record
                rec_uuids.append(rec_uuid)
                n += 1
        db.session.commit()
    return rec_uuids
Пример #22
0
    def create(cls, data, id_=None, owner=current_user):
        """Create a new deposit.

        :param data: metadata, need to contain $schema|$ana_type field
        :type data: dict
        :param id_: specify a UUID to use for the new record, instead of
                    automatically generated
        :type id_: `uuid.UUID`
        :param owner: owner of a new deposit (will get all permissions)
        :type owner: `invenio_accounts.models.User`

        :warn: if user session owner will be automatically current_user

        :return: newly created deposit
        :rtype: `CAPDeposit`

        Process:
        * fill deposit metadata based on given data
        * initialize the follow internal fields (underscore prefixed):
            _experiment: 'experiment_of_given_schema'
            _deposit: {
                'id': pid_value,
                'status': 'draft',
                'owners': [owner_id],
                'created_by': owner_id
            }
            _access: {
                'deposit-admin': {
                    'roles': [],
                    'users': [owner.id]
                },
                'deposit-update': {
                    'roles': [],
                    'users': [owner.id]
                },
                'deposit-read': {
                    'roles': [],
                    'users': [owner.id]
                }
            }
        * validate metadata against given schema (defined by $schema|$ana_type)
        * create RecordMetadata instance
        * create bucket for storing deposit files
        * set owner permissions in the db
        * index deposit in elasticsearch
        """
        if current_user and current_user.is_authenticated:
            owner = current_user

        with db.session.begin_nested():
            uuid_ = id_ or uuid.uuid4()

            data = cls._preprocess_create_data(data, uuid_, owner)

            # create RecordMetadata instance
            deposit = Record.create(data,
                                    id_=uuid_,
                                    validator=NoRequiredValidator)
            deposit.__class__ = cls

            # create files bucket
            bucket = Bucket.create()
            RecordsBuckets.create(record=deposit.model, bucket=bucket)
            # give owner permissions to the deposit
            if owner:
                for permission in DEPOSIT_ACTIONS:
                    db.session.add(
                        ActionUsers.allow(DEPOSIT_ACTIONS_NEEDS(
                            deposit.id)[permission],
                                          user=owner))

                    db.session.flush()

            return deposit
Пример #23
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        if not self.is_published():
            raise PIDInvalidAction()

        # Check that there is not a newer draft version for this record
        pid, record = self.fetch_published()
        pv = PIDVersioning(child=pid)
        if (not pv.draft_child and
                is_doi_locally_managed(record['doi'])):
            with db.session.begin_nested():

                # Get copy of the latest record
                latest_record = ZenodoRecord.get_record(
                    pv.last_child.object_uuid)
                data = latest_record.dumps()

                # Get the communities from the last deposit
                # and push those to the new version
                latest_depid = PersistentIdentifier.get(
                    'depid', data['_deposit']['id'])
                latest_deposit = ZenodoDeposit.get_record(
                    latest_depid.object_uuid)
                last_communities = latest_deposit.get('communities', [])

                owners = data['_deposit']['owners']

                # TODO: Check other data that may need to be removed
                keys_to_remove = (
                    '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema')
                for k in keys_to_remove:
                    data.pop(k, None)

                # NOTE: We call the superclass `create()` method, because we
                # don't want a new empty bucket, but an unlocked snapshot of
                # the old record's bucket.
                deposit = (super(ZenodoDeposit, self).create(data))
                # Injecting owners is required in case of creating new
                # version this outside of request context
                deposit['_deposit']['owners'] = owners
                if last_communities:
                    deposit['communities'] = last_communities

                ###
                conceptrecid = PersistentIdentifier.get(
                    'recid', data['conceptrecid'])
                recid = PersistentIdentifier.get(
                    'recid', str(data['recid']))
                depid = PersistentIdentifier.get(
                    'depid', str(data['_deposit']['id']))
                PIDVersioning(parent=conceptrecid).insert_draft_child(
                    child=recid)
                RecordDraft.link(recid, depid)

                # Pre-fill the Zenodo DOI to prevent the user from changing it
                # to a custom DOI.
                deposit['doi'] = doi_generator(recid.pid_value)

                pv = PIDVersioning(child=pid)
                index_siblings(pv.draft_child, neighbors_eager=True,
                               with_deposits=True)

                with db.session.begin_nested():
                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    if 'extra_formats' in latest_record['_buckets']:
                        extra_formats_snapshot = \
                            latest_record.extra_formats.bucket.snapshot(
                                lock=False)
                deposit['_buckets'] = {'deposit': str(snapshot.id)}
                RecordsBuckets.create(record=deposit.model, bucket=snapshot)
                if 'extra_formats' in latest_record['_buckets']:
                    deposit['_buckets']['extra_formats'] = \
                        str(extra_formats_snapshot.id)
                    RecordsBuckets.create(
                        record=deposit.model, bucket=extra_formats_snapshot)
                deposit.commit()
        return self
Пример #24
0
 def _resolve_bucket(cls, deposit, record):
     """Build bucket."""
     bucket = Bucket.create(location=Location.get_by_name('videos'))
     deposit['_buckets'] = {'deposit': str(bucket.id)}
     RecordsBuckets.create(record=deposit.model, bucket=bucket)
     record['_buckets'] = deepcopy(deposit['_buckets'])
Пример #25
0
def prepare_edit_item():
    """Prepare_edit_item.

    Host the api which provide 2 service:
        Create new activity for editing flow
        Check permission: check if user is owner/admin/shared user
    request:
        header: Content type must be json
        data:
            pid_value: pid_value
    return: The result json:
        code: status code,
        msg: meassage result,
        data: url redirect
    """
    def _get_workflow_by_item_type_id(item_type_name_id, item_type_id):
        """Get workflow settings by item type id."""
        workflow = WorkFlow.query.filter_by(itemtype_id=item_type_id).first()
        if not workflow:
            item_type_list = ItemTypes.get_by_name_id(item_type_name_id)
            id_list = [x.id for x in item_type_list]
            workflow = (WorkFlow.query.filter(
                WorkFlow.itemtype_id.in_(id_list)).order_by(
                    WorkFlow.itemtype_id.desc()).order_by(
                        WorkFlow.flow_id.asc()).first())
        return workflow

    if request.headers['Content-Type'] != 'application/json':
        """Check header of request"""
        return jsonify(code=-1, msg=_('Header Error'))
    post_activity = request.get_json()
    pid_value = post_activity.get('pid_value')

    if pid_value:
        try:
            record = WekoRecord.get_record_by_pid(pid_value)
            owner = str(record.get('owner'))
            shared_id = str(record.get('weko_shared_id'))
            user_id = str(get_current_user())
            is_admin = get_user_roles()
            activity = WorkActivity()

            pid_object = PersistentIdentifier.get('recid', pid_value)

            latest_pid = PIDVersioning(child=pid_object).last_child

            # check user's permission
            if user_id != owner and not is_admin[0] and user_id != shared_id:
                return jsonify(
                    code=-1, msg=_(r"You are not allowed to edit this item."))
            lists = ItemTypes.get_latest()
            if not lists:
                return jsonify(code=-1,
                               msg=_(r"You do not even have an Itemtype."))
            item_type_id = record.get('item_type_id')
            item_type = ItemTypes.get_by_id(item_type_id)
            if not item_type:
                return jsonify(code=-1, msg=_(r"This itemtype isn't found."))

            # check item is being editied
            item_id = latest_pid.object_uuid
            workflow_activity = activity.get_workflow_activity_by_item_id(
                item_id)
            if not workflow_activity:
                # get workflow of first record attached version ID: x.1
                workflow_activity = activity.get_workflow_activity_by_item_id(
                    pid_object.object_uuid)
                # if workflow of the item is not found
                # use default settings of item type to which the item belongs
            else:
                # show error when has stt is Begin or Doing
                if workflow_activity.action_status == \
                    ActionStatusPolicy.ACTION_BEGIN \
                    or workflow_activity.action_status == \
                        ActionStatusPolicy.ACTION_DOING:
                    return jsonify(code=-1,
                                   msg=_(r"The workflow is being edited."))

            # prepare params for new workflow activity
            if workflow_activity:
                post_activity['workflow_id'] = workflow_activity.workflow_id
                post_activity['flow_id'] = workflow_activity.flow_id
            else:
                workflow = _get_workflow_by_item_type_id(
                    item_type.name_id, item_type_id)
                if not workflow:
                    return jsonify(code=-1,
                                   msg=_('Workflow setting does not exist.'))
                post_activity['workflow_id'] = workflow.id
                post_activity['flow_id'] = workflow.flow_id
            post_activity['itemtype_id'] = item_type_id
            getargs = request.args
            community = getargs.get('community', None)

            # Create a new version of a record.
            record = WekoDeposit.get_record(item_id)
            if not record:
                return jsonify(code=-1, msg=_('Record does not exist.'))

            deposit = WekoDeposit(record, record.model)
            draft_record = deposit.newversion(pid_object)

            if not draft_record:
                return jsonify(code=-1, msg=_('An error has occurred.'))

            # Create snapshot bucket for draft record
            from invenio_records_files.models import RecordsBuckets
            try:
                with db.session.begin_nested():
                    from weko_workflow.utils import delete_bucket
                    draft_deposit = WekoDeposit(draft_record,
                                                draft_record.model)
                    snapshot = record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    draft_deposit['_buckets'] = {'deposit': str(snapshot.id)}
                    draft_record_bucket = RecordsBuckets.create(
                        record=draft_record.model, bucket=snapshot)

                    # Remove duplicated buckets
                    draft_record_buckets = RecordsBuckets.query.filter_by(
                        record_id=draft_record.model.id).all()
                    for record_bucket in draft_record_buckets:
                        if record_bucket != draft_record_bucket:
                            delete_bucket_id = record_bucket.bucket_id
                            RecordsBuckets.query.filter_by(
                                bucket_id=delete_bucket_id).delete()
                            delete_bucket(delete_bucket_id)
                    draft_deposit.commit()
            except Exception as ex:
                db.session.rollback()
                current_app.logger.exception(str(ex))
                return jsonify(code=-1, msg=_('An error has occurred.'))

            # Create a new workflow activity.
            rtn = activity.init_activity(post_activity, community,
                                         draft_record.model.id)

            if rtn:
                # GOTO: TEMPORARY EDIT MODE FOR IDENTIFIER
                identifier_actionid = get_actionid('identifier_grant')
                if workflow_activity:
                    identifier = activity.get_action_identifier_grant(
                        workflow_activity.activity_id, identifier_actionid)
                else:
                    identifier = activity.get_action_identifier_grant(
                        '', identifier_actionid)

                if identifier:
                    if identifier.get('action_identifier_select') > \
                            IDENTIFIER_GRANT_DOI:
                        identifier['action_identifier_select'] = \
                            IDENTIFIER_GRANT_CAN_WITHDRAW
                    elif identifier.get('action_identifier_select') == \
                            IDENTIFIER_GRANT_IS_WITHDRAWING:
                        identifier['action_identifier_select'] = \
                            IDENTIFIER_GRANT_WITHDRAWN
                    activity.create_or_update_action_identifier(
                        rtn.activity_id, identifier_actionid, identifier)

                mail_list = FeedbackMailList.get_mail_list_by_item_id(
                    item_id=pid_object.object_uuid)
                if mail_list:
                    activity.create_or_update_action_feedbackmail(
                        activity_id=rtn.activity_id,
                        action_id=ITEM_REGISTRATION_ACTION_ID,
                        feedback_maillist=mail_list)

                if community:
                    comm = GetCommunity.get_community_by_id(community)
                    url_redirect = url_for('weko_workflow.display_activity',
                                           activity_id=rtn.activity_id,
                                           community=comm.id)
                else:
                    url_redirect = url_for('weko_workflow.display_activity',
                                           activity_id=rtn.activity_id)
                return jsonify(code=0,
                               msg='success',
                               data={'redirect': url_redirect})

        except Exception as e:
            current_app.logger.error('Unexpected error: ', str(e))
    return jsonify(code=-1, msg=_('An error has occurred.'))
Пример #26
0
    def newversion(self, pid=None):
        """Create a new version deposit."""
        if not self.is_published():
            raise PIDInvalidAction()

        # Check that there is not a newer draft version for this record
        pid, record = self.fetch_published()
        pv = PIDVersioning(child=pid)
        if (not pv.draft_child and
                is_doi_locally_managed(record['doi'])):
            with db.session.begin_nested():

                # Get copy of the latest record
                latest_record = ZenodoRecord.get_record(
                    pv.last_child.object_uuid)
                data = latest_record.dumps()

                # Get the communities from the last deposit
                # and push those to the new version
                latest_depid = PersistentIdentifier.get(
                    'depid', data['_deposit']['id'])
                latest_deposit = ZenodoDeposit.get_record(
                    latest_depid.object_uuid)
                last_communities = latest_deposit.get('communities', [])

                owners = data['_deposit']['owners']

                # TODO: Check other data that may need to be removed
                keys_to_remove = (
                    '_deposit', 'doi', '_oai', '_files', '_buckets', '$schema')
                for k in keys_to_remove:
                    data.pop(k, None)

                # NOTE: We call the superclass `create()` method, because we
                # don't want a new empty bucket, but an unlocked snapshot of
                # the old record's bucket.
                deposit = (super(ZenodoDeposit, self).create(data))
                # Injecting owners is required in case of creating new
                # version this outside of request context
                deposit['_deposit']['owners'] = owners
                if last_communities:
                    deposit['communities'] = last_communities

                ###
                conceptrecid = PersistentIdentifier.get(
                    'recid', data['conceptrecid'])
                recid = PersistentIdentifier.get(
                    'recid', str(data['recid']))
                depid = PersistentIdentifier.get(
                    'depid', str(data['_deposit']['id']))
                PIDVersioning(parent=conceptrecid).insert_draft_child(
                    child=recid)
                RecordDraft.link(recid, depid)

                # Pre-fill the Zenodo DOI to prevent the user from changing it
                # to a custom DOI.
                deposit['doi'] = doi_generator(recid.pid_value)

                pv = PIDVersioning(child=pid)
                index_siblings(pv.draft_child, neighbors_eager=True,
                               with_deposits=True)

                with db.session.begin_nested():
                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                # FIXME: `snapshot.id` might not be present because we need to
                # commit first to the DB.
                # db.session.commit()
                deposit['_buckets'] = {'deposit': str(snapshot.id)}
                RecordsBuckets.create(record=deposit.model, bucket=snapshot)
                deposit.commit()
        return self
Пример #27
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))

                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #28
0
def files(temp, source):
    """Demo files for testing.

    .. note::

        This files are *only* for testing.
    """
    click.echo('Loading files it may take several minutes.')
    if not source:
        source = pkg_resources.resource_filename('cds.modules.fixtures',
                                                 'data/files.tar.gz')

    files = _handle_source(source, temp)

    d = current_app.config['DATADIR']
    if not exists(d):
        makedirs(d)

    # Clear data
    ObjectVersion.query.delete()
    Bucket.query.delete()
    FileInstance.query.delete()
    Location.query.delete()
    db.session.commit()

    # Create location
    loc = Location(name='local', uri=d, default=True)
    db.session.commit()

    # Record indexer
    indexer = RecordIndexer()
    for f in files:
        with open(join(source, f), 'rb') as fp:
            # Create bucket
            bucket = Bucket.create(loc)

            # The filename
            file_name = basename(f)

            # Create object version
            ObjectVersion.create(bucket, file_name, stream=fp)

            # Attach to dummy records
            rec_uuid = uuid.uuid4()
            record = {
                '_access': {
                    'read': ['*****@*****.**', 'it-dep']
                },
                'dummy':
                True,
                'files': [{
                    'uri':
                    '/api/files/{0}/{1}'.format(str(bucket.id), file_name),
                    'filename':
                    file_name,
                    'bucket':
                    str(bucket.id),
                    'local':
                    True
                }]
            }

            # Create PID
            current_pidstore.minters['recid'](rec_uuid, record)

            # Create record
            record = FileRecord.create(record, id_=rec_uuid)

            # Index record
            indexer.index(record)

            # Create records' bucket
            RecordsBuckets.create(record=record.model, bucket=bucket)
    db.session.commit()
    click.echo('DONE :)')
 def _create_buckets(self):
     bucket = Bucket.create()
     RecordsBuckets.create(record=self.model, bucket=bucket)
Пример #30
0
Файл: api.py Проект: mhaya/weko
    def newversion(self, pid=None):
        """Create a new version deposit."""
        deposit = None
        try:
            if not self.is_published():
                raise PIDInvalidAction()

            # Check that there is not a newer draft version for this record
            # and this is the latest version
            pv = PIDVersioning(child=pid)
            if pv.exists and not pv.draft_child and pid == pv.last_child:
                last_pid = pv.last_child
                # Get copy of the latest record
                latest_record = WekoDeposit.get_record(last_pid.object_uuid)
                if latest_record is not None:
                    data = latest_record.dumps()

                    owners = data['_deposit']['owners']
                    keys_to_remove = ('_deposit', 'doi', '_oai', '_files',
                                      '_buckets', '$schema')
                    for k in keys_to_remove:
                        data.pop(k, None)

                    # NOTE: We call the superclass `create()` method, because we
                    # don't want a new empty bucket, but an unlocked snapshot of
                    # the old record's bucket.
                    deposit = super(WekoDeposit, self).create(data)
                    # Injecting owners is required in case of creating new
                    # version this outside of request context
                    deposit['_deposit']['owners'] = owners

                    recid = PersistentIdentifier.get(
                        'recid', str(data['_deposit']['id']))
                    depid = PersistentIdentifier.get(
                        'depid', str(data['_deposit']['id']))
                    PIDVersioning(parent=pv.parent).insert_draft_child(
                        child=recid)
                    RecordDraft.link(recid, depid)

                    # Create snapshot from the record's bucket and update data
                    snapshot = latest_record.files.bucket.snapshot(lock=False)
                    snapshot.locked = False
                    deposit['_buckets'] = {'deposit': str(snapshot.id)}
                    RecordsBuckets.create(record=deposit.model,
                                          bucket=snapshot)
                    if 'extra_formats' in latest_record['_buckets']:
                        extra_formats_snapshot = \
                            latest_record.extra_formats.bucket.snapshot(
                                lock=False)
                        deposit['_buckets']['extra_formats'] = \
                            str(extra_formats_snapshot.id)
                        RecordsBuckets.create(record=deposit.model,
                                              bucket=extra_formats_snapshot)
                    index = {
                        'index':
                        self.get('path', []),
                        'actions':
                        'private' if self.get('publish_status', '1') == '1'
                        else 'publish'
                    }
                    if 'activity_info' in session:
                        del session['activity_info']
                    item_metadata = ItemsMetadata.get_record(
                        last_pid.object_uuid).dumps()
                    args = [index, item_metadata]
                    deposit.update(*args)
                    deposit.commit()
            return deposit
        except SQLAlchemyError as ex:
            current_app.logger.debug(ex)
            db.session.rollback()
            return None
Пример #31
0
def datasets(skip_files):
    """Load demo datasets records."""
    from invenio_db import db
    from invenio_records_files.api import Record
    from invenio_indexer.api import RecordIndexer
    from cernopendata.modules.records.minters.recid import \
        cernopendata_recid_minter
    from cernopendata.modules.records.minters.datasetid import \
        cernopendata_datasetid_minter

    from invenio_files_rest.models import \
        Bucket, FileInstance, ObjectVersion
    from invenio_records_files.models import RecordsBuckets

    indexer = RecordIndexer()
    schema = current_app.extensions['invenio-jsonschemas'].path_to_url(
        'records/datasets-v1.0.0.json'
    )
    data = pkg_resources.resource_filename('cernopendata',
                                           'modules/fixtures/data/datasets')
    datasets_json = glob.glob(os.path.join(data, '*.json'))

    # FIXME: change the treatment of `files` according to `records` fixtures.
    for filename in datasets_json:

        click.echo('Loading datasets from {0} ...'.format(filename))

        with open(filename, 'rb') as source:
            for data in json.load(source):
                files = data.pop('files', [])

                id = uuid.uuid4()
                # (TOFIX) Remove if statement in production
                # as every dataset record should have a doi
                if data.get('doi', None):
                    cernopendata_datasetid_minter(id, data)
                else:
                    cernopendata_recid_minter(id, data)
                data['$schema'] = schema
                record = Record.create(data, id_=id)

                bucket = Bucket.create()
                RecordsBuckets.create(
                    record=record.model, bucket=bucket)

                for file in files:
                    if skip_files:
                        break
                    assert 'uri' in file
                    assert 'size' in file
                    assert 'checksum' in file

                    f = FileInstance.create()
                    filename = file.get("uri").split('/')[-1:][0]
                    f.set_uri(file.get("uri"), file.get(
                        "size"), file.get("checksum"))

                    ObjectVersion.create(
                        bucket,
                        filename,
                        _file_id=f.id
                    )
                db.session.commit()
                indexer.index(record)
                db.session.expunge_all()
Пример #32
0
def test_RecordSIP_create(db, mocker):
    """Test create method from the API class RecordSIP."""
    # we setup a file storage
    tmppath = tempfile.mkdtemp()
    db.session.add(Location(name='default', uri=tmppath, default=True))
    # setup metadata
    mtype = SIPMetadataType(title='JSON Test',
                            name='json-test',
                            format='json',
                            schema='url://to/schema')
    db.session.add(mtype)
    db.session.commit()
    # first we create a record
    recid = uuid.uuid4()
    pid = PersistentIdentifier.create('recid',
                                      '1337',
                                      object_type='rec',
                                      object_uuid=recid,
                                      status=PIDStatus.REGISTERED)
    mocker.patch('invenio_records.api.RecordBase.validate',
                 return_value=True,
                 autospec=True)
    record = Record.create(
        {
            'title': 'record test',
            '$schema': 'url://to/schema'
        }, recid)
    # we add a file to the record
    bucket = Bucket.create()
    content = b'Test file\n'
    RecordsBuckets.create(record=record.model, bucket=bucket)
    record.files['test.txt'] = BytesIO(content)
    db.session.commit()
    # Let's create a SIP
    user = create_test_user('*****@*****.**')
    agent = {'email': '*****@*****.**', 'ip_address': '1.1.1.1'}
    rsip = RecordSIP.create(pid, record, True, user_id=user.id, agent=agent)
    db.session.commit()
    # test!
    assert RecordSIP_.query.count() == 1
    assert SIP_.query.count() == 1
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 1
    assert len(rsip.sip.files) == 1
    assert len(rsip.sip.metadata) == 1
    metadata = rsip.sip.metadata[0]
    assert metadata.type.format == 'json'
    assert '"title": "record test"' in metadata.content
    assert rsip.sip.archivable is True
    # we try with no files
    rsip = RecordSIP.create(pid,
                            record,
                            True,
                            create_sip_files=False,
                            user_id=user.id,
                            agent=agent)
    assert SIPFile.query.count() == 1
    assert SIPMetadata.query.count() == 2
    assert len(rsip.sip.files) == 0
    assert len(rsip.sip.metadata) == 1

    # try with specific SIP metadata type
    mtype = SIPMetadataType(title='JSON Test 2',
                            name='json-test-2',
                            format='json',
                            schema=None)  # no schema
    db.session.add(mtype)
    db.session.commit()

    rsip = RecordSIP.create(pid,
                            record,
                            True,
                            create_sip_files=False,
                            user_id=user.id,
                            agent=agent,
                            sip_metadata_type='json-test-2')
    assert SIPMetadata.query.count() == 3
    assert len(rsip.sip.metadata) == 1
    assert rsip.sip.metadata[0].type.id == mtype.id

    # finalization
    rmtree(tmppath)