def data_policies(): """Load demo Data Policy records.""" from invenio_db import db from invenio_records import Record from invenio_indexer.api import RecordIndexer from invenio_pidstore.errors import PIDDoesNotExistError, \ PersistentIdentifierError from invenio_pidstore.models import PIDStatus, PersistentIdentifier from invenio_pidstore.fetchers import recid_fetcher from invenio_pidstore.minters import recid_minter from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: with open(filename, 'rb') as source: for data in json.load(source): id = uuid.uuid4() cernopendata_recid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema db.session.commit() indexer.index(record) db.session.expunge_all()
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def data_policies(skip_files): """Load demo Data Policy records.""" from invenio_db import db from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets from invenio_records_files.api import Record from invenio_records.models import RecordMetadata indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/data-policies-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data') data_policies_json = glob.glob(os.path.join(data, '*.json')) for filename in data_policies_json: click.echo('Loading data-policies from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id, with_bucket=not skip_files) if not skip_files: handle_record_files(data, record.bucket, files, skip_files) return record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create(record=record.model, bucket=bucket) return record
def create_record(schema, data, files, skip_files): """Creates a new record.""" id = uuid.uuid4() cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) if not skip_files: bucket = Bucket.create() handle_record_files(data, bucket, files, skip_files) RecordsBuckets.create( record=record.model, bucket=bucket) return record
def software(): """Load demo software records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/software-v1.0.0.json') data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/software') software_json = glob.glob(os.path.join(data, '*.json')) for filename in software_json: with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', None) id = uuid.uuid4() cernopendata_recid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema bucket = Bucket.create() RecordsBuckets.create(record=record.model, bucket=bucket) for file in files: assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) ObjectVersion.create(bucket, filename, _file_id=f.id) db.session.commit() indexer.index(record) db.session.expunge_all()
def create_record(schema, data, files, skip_files): """Creates a new record.""" bucket = Bucket.create() for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file try: f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get("size"), file.get("checksum")) obj = ObjectVersion.create(bucket, filename, _file_id=f.id) file.update({ 'bucket': str(obj.bucket_id), 'checksum': obj.file.checksum, 'key': obj.key, 'version_id': str(obj.version_id), }) except Exception as e: click.echo('Recid {0} file {1} could not be loaded due ' 'to {2}.'.format(data.get('recid'), filename, str(e))) continue id = uuid.uuid4() cernopendata_recid_minter(id, data) record = Record.create(data, id_=id) record['$schema'] = schema RecordsBuckets.create(record=record.model, bucket=bucket) return record
def datasets(skip_files): """Load demo datasets records.""" from invenio_db import db from invenio_records_files.api import Record from invenio_indexer.api import RecordIndexer from cernopendata.modules.records.minters.recid import \ cernopendata_recid_minter from cernopendata.modules.records.minters.datasetid import \ cernopendata_datasetid_minter from invenio_files_rest.models import \ Bucket, FileInstance, ObjectVersion from invenio_records_files.models import RecordsBuckets indexer = RecordIndexer() schema = current_app.extensions['invenio-jsonschemas'].path_to_url( 'records/datasets-v1.0.0.json' ) data = pkg_resources.resource_filename('cernopendata', 'modules/fixtures/data/datasets') datasets_json = glob.glob(os.path.join(data, '*.json')) # FIXME: change the treatment of `files` according to `records` fixtures. for filename in datasets_json: click.echo('Loading datasets from {0} ...'.format(filename)) with open(filename, 'rb') as source: for data in json.load(source): files = data.pop('files', []) id = uuid.uuid4() # (TOFIX) Remove if statement in production # as every dataset record should have a doi if data.get('doi', None): cernopendata_datasetid_minter(id, data) else: cernopendata_recid_minter(id, data) data['$schema'] = schema record = Record.create(data, id_=id) bucket = Bucket.create() RecordsBuckets.create( record=record.model, bucket=bucket) for file in files: if skip_files: break assert 'uri' in file assert 'size' in file assert 'checksum' in file f = FileInstance.create() filename = file.get("uri").split('/')[-1:][0] f.set_uri(file.get("uri"), file.get( "size"), file.get("checksum")) ObjectVersion.create( bucket, filename, _file_id=f.id ) db.session.commit() indexer.index(record) db.session.expunge_all()