def index_after_commit(sender, changes): """Index a record in ES after it was committed to the DB. This cannot happen in an ``after_record_commit`` receiver from Invenio-Records because, despite the name, at that point we are not yet sure whether the record has been really committed to the DB. """ indexer = InspireRecordIndexer() for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ('insert', 'update') and not model_instance.json.get("deleted"): if hasattr(model_instance, '_enhanced_record'): record = model_instance._enhanced_record else: record = model_instance.json indexer.index(InspireRecord(record, model_instance)) else: try: indexer.delete( InspireRecord(model_instance.json, model_instance)) except NotFoundError: # Record not found in ES LOGGER.debug('Record %s not found in ES', model_instance.json.get("id")) pass pid_type = get_pid_type_from_schema(model_instance.json['$schema']) pid_value = model_instance.json['control_number'] db_version = model_instance.version_id index_modified_citations_from_record.delay(pid_type, pid_value, db_version)
def record_insert_or_replace(json, skip_files=False): """Insert or replace a record.""" pid_type = get_pid_type_from_schema(json['$schema']) control_number = json['control_number'] try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime( json['legacy_creation_date'], '%Y-%m-%d') record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime( json['legacy_creation_date'], '%Y-%m-%d') inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def record_insert_or_replace(json, skip_files=False): """Insert or replace a record.""" pid_type = get_pid_type_from_schema(json['$schema']) control_number = json['control_number'] try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d') record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None, skip_files=skip_files) if json.get('legacy_creation_date'): record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d') inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def index_after_commit(sender, changes): """Index a record in ES after it was committed to the DB. This cannot happen in an ``after_record_commit`` receiver from Invenio-Records because, despite the name, at that point we are not yet sure whether the record has been really committed to the DB. """ indexer = RecordIndexer() for model_instance, change in changes: if isinstance(model_instance, RecordMetadata): if change in ('insert', 'update') and not model_instance.json.get("deleted"): if hasattr(model_instance, '_enhanced_record'): record = model_instance._enhanced_record else: record = model_instance.json indexer.index(InspireRecord(record, model_instance)) else: try: indexer.delete(InspireRecord( model_instance.json, model_instance)) except NotFoundError: # Record not found in ES LOGGER.debug('Record %s not found in ES', model_instance.json.get("id")) pass pid_type = get_pid_type_from_schema(model_instance.json['$schema']) pid_value = model_instance.json['control_number'] db_version = model_instance.version_id index_modified_citations_from_record.delay(pid_type, pid_value, db_version)
def merge_merged_records(): """Merge all records that were marked as merged.""" records = get_merged_records() for record in records: record_pid = PersistentIdentifier.query.filter_by(object_uuid=record.id).one() deleted_ids = [get_recid_from_ref(ref) for ref in record['deleted_records']] for deleted_id in deleted_ids: deleted_pid = PersistentIdentifier.query.filter_by( pid_value=str(deleted_id) ).one_or_none() if not deleted_pid: deleted_pid = PersistentIdentifier.create( pid_type=get_pid_type_from_schema(record['$schema']), pid_value=deleted_id, object_type='rec' ) deleted_pid.register() db.session.add(deleted_pid) deleted_pid.redirect(record_pid) db.session.commit()
def merge_merged_records(): """Merge all records that were marked as merged.""" records = get_merged_records() for record in records: record_pid = PersistentIdentifier.query.filter_by( object_uuid=record.id).one() deleted_ids = [ get_recid_from_ref(ref) for ref in record['deleted_records'] ] for deleted_id in deleted_ids: deleted_pid = PersistentIdentifier.query.filter_by( pid_value=str(deleted_id)).one_or_none() if not deleted_pid: deleted_pid = PersistentIdentifier.create( pid_type=get_pid_type_from_schema(record['$schema']), pid_value=deleted_id, object_type='rec') deleted_pid.register() db.session.add(deleted_pid) deleted_pid.redirect(record_pid) db.session.commit()
def _get_ref(self): """Returns full url to this object (as in $ref)""" pid_value = self.get('control_number') pid_type = get_pid_type_from_schema(self.get('$schema')) endpoint = get_endpoint_from_pid_type(pid_type) return absolute_url(u'/api/{endpoint}/{control_number}'.format(endpoint=endpoint, control_number=pid_value))
def _get_ref(self): """Returns full url to this object (as in $ref)""" pid_value = self.get('control_number') pid_type = get_pid_type_from_schema(self.get('$schema')) endpoint = get_endpoint_from_pid_type(pid_type) return absolute_url(u'/api/{endpoint}/{control_number}'.format( endpoint=endpoint, control_number=pid_value))
def create_or_update(cls, data, **kwargs): """Create or update a record. It will check if there is any record registered with the same ``control_number`` and ``pid_type``. If it's ``True``, it will update the current record, otherwise it will create a new one. Keyword Args: files_src_records(List[InspireRecord]): if passed, it will try to get the files for the documents and figures from the first record in the list that has it in it's files iterator before downloading them, for example to merge existing records. skip_files(bool): if ``True`` it will skip the files retrieval described above. Note also that, if not passed, it will fall back to the value of the ``RECORDS_SKIP_FILES`` configuration variable. Examples: >>> record = { ... '$schema': 'hep.json', ... } >>> record = InspireRecord.create_or_update(record) >>> record.commit() """ pid_type = get_pid_type_from_schema(data['$schema']) control_number = data.get('control_number') files_src_records = kwargs.pop('files_src_records', []) skip_files = kwargs.pop('skip_files', current_app.config.get('RECORDS_SKIP_FILES')) try: pid = PersistentIdentifier.get(pid_type, control_number) record = super(InspireRecord, cls).get_record(pid.object_uuid) record.clear() record.update(data, skip_files=skip_files, **kwargs) if data.get('legacy_creation_date'): record.model.created = datetime.strptime( data['legacy_creation_date'], '%Y-%m-%d') except PIDDoesNotExistError: record = cls.create(data, skip_files=skip_files, **kwargs) if data.get('legacy_creation_date'): record.model.created = datetime.strptime( data['legacy_creation_date'], '%Y-%m-%d') if data.get('deleted'): new_recid = get_recid_from_ref(data.get('new_record')) if not new_recid: record.delete() if not skip_files: record.download_documents_and_figures( src_records=files_src_records, ) return record
def create_or_update(cls, data, **kwargs): """Create or update a record. It will check if there is any record registered with the same ``control_number`` and ``pid_type``. If it's ``True``, it will update the current record, otherwise it will create a new one. Keyword Args: files_src_records(List[InspireRecord]): if passed, it will try to get the files for the documents and figures from the first record in the list that has it in it's files iterator before downloading them, for example to merge existing records. skip_files(bool): if ``True`` it will skip the files retrieval described above. Note also that, if not passed, it will fall back to the value of the ``RECORDS_SKIP_FILES`` configuration variable. Examples: >>> record = { ... '$schema': 'hep.json', ... } >>> record = InspireRecord.create_or_update(record) >>> record.commit() """ pid_type = get_pid_type_from_schema(data['$schema']) control_number = data.get('control_number') files_src_records = kwargs.pop('files_src_records', []) skip_files = kwargs.pop( 'skip_files', current_app.config.get('RECORDS_SKIP_FILES')) try: pid = PersistentIdentifier.get(pid_type, control_number) record = super(InspireRecord, cls).get_record(pid.object_uuid) record.clear() record.update(data, skip_files=skip_files, **kwargs) if data.get('legacy_creation_date'): record.model.created = datetime.strptime(data['legacy_creation_date'], '%Y-%m-%d') except PIDDoesNotExistError: record = cls.create(data, skip_files=skip_files, **kwargs) if data.get('legacy_creation_date'): record.model.created = datetime.strptime(data['legacy_creation_date'], '%Y-%m-%d') if data.get('deleted'): new_recid = get_recid_from_ref(data.get('new_record')) if not new_recid: record.delete() if not skip_files: record.download_documents_and_figures( src_records=files_src_records, ) return record
def _get_index_ref(self): """Return shorten version of pid_value/pid_type for proper querying with index""" pid_value = self.get('control_number') pid_type = get_pid_type_from_schema(self.get('$schema')) endpoint = get_endpoint_from_pid_type(pid_type) shorten_endpoint = endpoint[:3] return "{pid_value}{shorten_endpoint}".format(pid_value=pid_value, shorten_endpoint=shorten_endpoint)
def _get_index_ref(self): """Return shorten version of pid_value/pid_type for proper querying with index""" pid_value = self.get('control_number') pid_type = get_pid_type_from_schema(self.get('$schema')) endpoint = get_endpoint_from_pid_type(pid_type) shorten_endpoint = endpoint[:3] return "{pid_value}{shorten_endpoint}".format( pid_value=pid_value, shorten_endpoint=shorten_endpoint)
def _get_updated_record(obj): """TODO: use only head_uuid once we have the merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['matches']['approved'] updated_record = get_db_record(pid_type, updated_record_id) return updated_record
def _get_updated_record(obj): """TODO: use only head_uuid once we have them merger.""" if 'head_uuid' in obj.extra_data: updated_record = InspireRecord.get_record( obj.extra_data['head_uuid'], ) else: pid_type = get_pid_type_from_schema(obj.data['$schema']) updated_record_id = obj.extra_data['record_matches'][0] updated_record = get_db_record(pid_type, updated_record_id) return updated_record
def record_insert_or_replace(json): """Insert or replace a record.""" control_number = json.get('control_number', json.get('recid')) if control_number: pid_type = get_pid_type_from_schema(json['$schema']) try: pid = PersistentIdentifier.get(pid_type, control_number) record = InspireRecord.get_record(pid.object_uuid) record.clear() record.update(json) record.commit() except PIDDoesNotExistError: record = InspireRecord.create(json, id_=None) # Create persistent identifier. inspire_recid_minter(str(record.id), json) if json.get('deleted'): new_recid = get_recid_from_ref(json.get('new_record')) if not new_recid: record.delete() return record
def test_get_pid_type_from_schema(): expected = 'lit' result = get_pid_type_from_schema('http://localhost:5000/schemas/record/hep.json') assert expected == result
def test_get_pid_from_schema_supports_relative_urls(): expected = 'aut' result = get_pid_type_from_schema('schemas/record/authors.json') assert expected == result
def get_endpoint_from_record(record): """Return the endpoint corresponding to a record.""" pid_type = get_pid_type_from_schema(record['$schema']) endpoint = get_endpoint_from_pid_type(pid_type) return endpoint
def test_get_pid_from_schema_supports_relative_urls(): expected = 'aut' result = get_pid_type_from_schema('schemas/record/authors.json') assert expected == result
def test_get_pid_type_from_schema(): expected = 'lit' result = get_pid_type_from_schema( 'http://localhost:5000/schemas/record/hep.json') assert expected == result
def get_endpoint_from_record(record): """Return the endpoint corresponding to a record.""" pid_type = get_pid_type_from_schema(record['$schema']) endpoint = get_endpoint_from_pid_type(pid_type) return endpoint