Exemplo n.º 1
0
def _delete_merged_records(pid_type, merged_pid_value, deleted_pid_value, merged_uuid, deleted_uuid):
    InspireRecord.get_record(merged_uuid)._delete(force=True)
    InspireRecord.get_record(deleted_uuid)._delete(force=True)

    merged_pid = PersistentIdentifier.get(pid_type, merged_pid_value)
    deleted_pid = PersistentIdentifier.get(pid_type, deleted_pid_value)

    Redirect.query.filter(Redirect.id == deleted_pid.object_uuid).delete()

    db.session.delete(merged_pid)
    db.session.delete(deleted_pid)

    db.session.commit()
Exemplo n.º 2
0
def _delete_merged_records(pid_type, merged_pid_value, deleted_pid_value,
                           merged_uuid, deleted_uuid):
    InspireRecord.get_record(merged_uuid)._delete(force=True)
    InspireRecord.get_record(deleted_uuid)._delete(force=True)

    merged_pid = PersistentIdentifier.get(pid_type, merged_pid_value)
    deleted_pid = PersistentIdentifier.get(pid_type, deleted_pid_value)

    Redirect.query.filter(Redirect.id == deleted_pid.object_uuid).delete()

    db.session.delete(merged_pid)
    db.session.delete(deleted_pid)

    db.session.commit()
Exemplo n.º 3
0
def test_update_authors_recid_method(small_app):
    """Test the method responsible for updating author's recid."""
    from inspirehep.modules.disambiguation.tasks import update_authors_recid

    pid = PersistentIdentifier.get('lit', 4328)
    publication_id = str(pid.object_uuid)

    signature = InspireRecord.get_record(publication_id)['authors'][0]['uuid']
    profile_recid = "314159265"

    update_authors_recid(publication_id, signature, profile_recid)

    assert InspireRecord.get_record(publication_id)['authors'][0]['recid'] == \
        profile_recid
Exemplo n.º 4
0
def sample_record(app):
    record = {
        "$schema": "http://localhost:5000/schemas/records/hep.json",
        "control_number": 123,
        "titles": [
            {
                "title": "Supersymmetric gauge field theory and string theory"
            }
        ],
        "collections": [
            {
                "primary": "HEP"
            },
            {
                "primary": "THESIS"
            }
        ]
    }
    record = _create_and_index_record(record)
    record_id = record.id

    yield record

    pid = PersistentIdentifier.get('lit', '123')
    db.session.delete(pid)
    record = InspireRecord.get_record(record_id)
    record._delete(force=True)
    current_app.extensions[
        'invenio-db'].versioning_manager.transaction_cls.query.delete()
    db.session.commit()
Exemplo n.º 5
0
def record_insert_or_replace(json, skip_files=False):
    """Insert or replace a record."""
    pid_type = get_pid_type_from_schema(json['$schema'])
    control_number = json['control_number']

    try:
        pid = PersistentIdentifier.get(pid_type, control_number)
        record = InspireRecord.get_record(pid.object_uuid)
        record.clear()
        record.update(json, skip_files=skip_files)
        if json.get('legacy_creation_date'):
            record.model.created = datetime.strptime(
                json['legacy_creation_date'], '%Y-%m-%d')
        record.commit()
    except PIDDoesNotExistError:
        record = InspireRecord.create(json, id_=None, skip_files=skip_files)
        if json.get('legacy_creation_date'):
            record.model.created = datetime.strptime(
                json['legacy_creation_date'], '%Y-%m-%d')
        inspire_recid_minter(str(record.id), json)

    if json.get('deleted'):
        new_recid = get_recid_from_ref(json.get('new_record'))
        if not new_recid:
            record.delete()

    return record
Exemplo n.º 6
0
 def actions():
     for uuid in uuids:
         try:
             record = InspireRecord.get_record(uuid)
             yield create_index_op(record, version_type='force')
         except NoResultFound as e:
             logger.warn('Record %s failed to load: %s', uuid, e)
Exemplo n.º 7
0
def record_insert_or_replace(json, skip_files=False):
    """Insert or replace a record."""
    pid_type = get_pid_type_from_schema(json['$schema'])
    control_number = json['control_number']

    try:
        pid = PersistentIdentifier.get(pid_type, control_number)
        record = InspireRecord.get_record(pid.object_uuid)
        record.clear()
        record.update(json, skip_files=skip_files)
        if json.get('legacy_creation_date'):
            record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d')
        record.commit()
    except PIDDoesNotExistError:
        record = InspireRecord.create(json, id_=None, skip_files=skip_files)
        if json.get('legacy_creation_date'):
            record.model.created = datetime.strptime(json['legacy_creation_date'], '%Y-%m-%d')
        inspire_recid_minter(str(record.id), json)

    if json.get('deleted'):
        new_recid = get_recid_from_ref(json.get('new_record'))
        if not new_recid:
            record.delete()

    return record
Exemplo n.º 8
0
 def index_by_id(self, record_uuid):
     """
     Index a record by record identifier
     Args:
         record_uuid: Record uuid
     """
     return self.index(InspireRecord.get_record(record_uuid))
Exemplo n.º 9
0
def store_record(obj, eng):
    """Insert or replace a record."""
    is_update = obj.extra_data.get('is-update')
    is_authors = eng.workflow_definition.data_type == 'authors'

    if is_update:
        if not is_authors and not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER', False):
            obj.log.info(
                'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.'
            )
            return

        record = InspireRecord.get_record(obj.extra_data['head_uuid'])
        obj.data['control_number'] = record['control_number']
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        # Skip the files to avoid issues in case the record has already pid
        # TODO: remove the skip files once labs becomes master
        record = InspireRecord.create(obj.data, id_=None, skip_files=True)
        # Create persistent identifier.
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = record['control_number']
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
Exemplo n.º 10
0
def sample_record(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': [
            'Literature',
        ],
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [{
            'title': 'sample'
        }],
    }
    record = _create_and_index_record(record)
    record_id = record.id

    yield record

    pid = PersistentIdentifier.get('lit', '111')
    db.session.delete(pid)
    record = InspireRecord.get_record(record_id)
    record._delete(force=True)
    current_app.extensions[
        'invenio-db'].versioning_manager.transaction_cls.query.delete()
    db.session.commit()
Exemplo n.º 11
0
def store_record(obj, eng):
    """Insert or replace a record."""
    is_update = obj.extra_data.get('is-update')
    is_authors = eng.workflow_definition.data_type == 'authors'
    if not current_app.config.get("FEATURE_FLAG_ENABLE_REST_RECORD_MANAGEMENT"):
        with db.session.begin_nested():
            if is_update:
                if not is_authors and not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER', False):
                    obj.log.info(
                        'skipping update record, feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is disabled.'
                    )
                    return

                record = InspireRecord.get_record(obj.extra_data['head_uuid'])
                obj.data['control_number'] = record['control_number']
                record.clear()
                record.update(obj.data, files_src_records=[obj])

            else:
                # Skip the files to avoid issues in case the record has already pid
                # TODO: remove the skip files once labs becomes master
                record = InspireRecord.create(obj.data, id_=None, skip_files=True)
                # Create persistent identifier.
                # Now that we have a recid, we can properly download the documents
                record.download_documents_and_figures(src_records=[obj])

                obj.data['control_number'] = record['control_number']
                # store head_uuid to store the root later
                obj.extra_data['head_uuid'] = str(record.id)

            record.commit()
            obj.save()
    else:
        store_record_inspirehep_api(obj, eng, is_update, is_authors)
Exemplo n.º 12
0
def sample_record(app):
    record = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': [
            'Literature',
        ],
        'control_number': 111,
        'document_type': [
            'article',
        ],
        'titles': [
            {'title': 'sample'}
        ],
    }
    record = _create_and_index_record(record)
    record_id = record.id

    yield record

    pid = PersistentIdentifier.get('lit', '111')
    db.session.delete(pid)
    record = InspireRecord.get_record(record_id)
    record._delete(force=True)
    current_app.extensions[
        'invenio-db'].versioning_manager.transaction_cls.query.delete()
    db.session.commit()
Exemplo n.º 13
0
def test_appoint_profile_from_claimed_signature(small_app):
    """Check the module for the case where claimed signature takes
    everything.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    old_record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record['authors'][0]['uuid']

    # Add phonetic block to the record.
    old_record['authors'][0]['signature_block'] = "HAGp"
    old_record['authors'][0]['recid'] = "2"
    es.index(index='records-hep',
             doc_type='hep',
             id=old_record_id,
             body=old_record)
    es.indices.refresh('records-hep')

    record_id = str(PersistentIdentifier.get('lit', 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    record['authors'][0]['recid'] = "314159265"
    record['authors'][0]['curated_relation'] = True
    es.index(index='records-hep', doc_type='hep', id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({
                   "2": [old_author_uuid, author_uuid]
               }, {}))):
        with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(old_record_id)['authors'][0]['recid'] == \
        "314159265"
    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == \
        "314159265"
Exemplo n.º 14
0
 def actions():
     for uuid in uuids:
         try:
             record = InspireRecord.get_record(uuid)
             if record.get('deleted', False):
                 logger.debug("Record already %s deleted, not indexing!", uuid)
                 continue
             yield create_index_op(record, version_type='force')
         except NoResultFound as e:
             logger.warn('Record %s failed to load: %s', uuid, e)
Exemplo n.º 15
0
def test_appoint_profile_from_claimed_signature(small_app):
    """Check the module for the case where claimed signature takes
    everything.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering,
        update_authors_recid
    )

    old_record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    old_record = get_es_record_by_uuid(old_record_id)
    old_author_uuid = old_record['authors'][0]['uuid']

    # Add phonetic block to the record.
    old_record['authors'][0]['signature_block'] = "HAGp"
    old_record['authors'][0]['recid'] = "2"
    es.index(index='records-hep', doc_type='hep',
             id=old_record_id, body=old_record)
    es.indices.refresh('records-hep')

    record_id = str(PersistentIdentifier.get('lit', 1358492).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    record['authors'][0]['recid'] = "314159265"
    record['authors'][0]['curated_relation'] = True
    es.index(index='records-hep', doc_type='hep',
             id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(
                   ({"2": [old_author_uuid, author_uuid]}, {}))):
        with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                   side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(old_record_id)['authors'][0]['recid'] == \
        "314159265"
    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == \
        "314159265"
Exemplo n.º 16
0
def test_append_updated_record_to_queue_same_data(small_app):
    """Check if for the same record, the receiver will skip the publication."""
    pid = PersistentIdentifier.get('lit', 11883)
    publication_id = str(pid.object_uuid)
    record = InspireRecord.get_record(publication_id)

    append_updated_record_to_queue(None, record, record, 'records-hep', 'hep')

    assert str(record.id) != \
        DisambiguationRecord.query.order_by(desc('id')).first().record_id
Exemplo n.º 17
0
def test_append_updated_record_to_queue_same_data(small_app):
    """Check if for the same record, the receiver will skip the publication."""
    pid = PersistentIdentifier.get('lit', 11883)
    publication_id = str(pid.object_uuid)
    record = InspireRecord.get_record(publication_id)

    append_updated_record_to_queue(None, record, record, "records-hep", "hep")

    assert str(record.id) != \
        DisambiguationRecord.query.order_by(desc("id")).first().record_id
Exemplo n.º 18
0
    def _get_updated_record(obj):
        """TODO: use only head_uuid once we have the merger."""
        if 'head_uuid' in obj.extra_data:
            updated_record = InspireRecord.get_record(
                obj.extra_data['head_uuid'], )
        else:
            pid_type = get_pid_type_from_schema(obj.data['$schema'])
            updated_record_id = obj.extra_data['matches']['approved']
            updated_record = get_db_record(pid_type, updated_record_id)

        return updated_record
Exemplo n.º 19
0
 def actions():
     for uuid in uuids:
         try:
             record = InspireRecord.get_record(uuid)
             if record.get('deleted', False):
                 logger.debug("Record already %s deleted, not indexing!",
                              uuid)
                 continue
             yield create_index_op(record, version_type='force')
         except NoResultFound as e:
             logger.warn('Record %s failed to load: %s', uuid, e)
Exemplo n.º 20
0
    def _get_updated_record(obj):
        """TODO: use only head_uuid once we have them merger."""
        if 'head_uuid' in obj.extra_data:
            updated_record = InspireRecord.get_record(
                obj.extra_data['head_uuid'],
            )
        else:
            pid_type = get_pid_type_from_schema(obj.data['$schema'])
            updated_record_id = obj.extra_data['record_matches'][0]
            updated_record = get_db_record(pid_type, updated_record_id)

        return updated_record
Exemplo n.º 21
0
def record_to_merge(workflow_app):
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': [
            'Literature'
        ],
        'authors': [
            {
                'full_name': 'Jessica, Jones',
            },
        ],
        'document_type': [
            'thesis'
        ],
        'number_of_pages': 100,
        'preprint_date': '2016-11-16',
        'public_notes': [
            {
                'source': 'arXiv',
                'value': '100 pages, 36 figures'
            }
        ],
        'titles': [
            {
                'title': 'Alias Investigations'
            }
        ],
        'dois': [
            {
                'value': '10.1007/978-3-319-15001-7'
            }
        ],
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(
        pid_type='lit',
        pid_value=record['control_number']
    )

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
Exemplo n.º 22
0
def record_to_merge(workflow_app):
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': [
            'Literature'
        ],
        'authors': [
            {
                'full_name': 'Jessica, Jones',
            },
        ],
        'document_type': [
            'thesis'
        ],
        'number_of_pages': 100,
        'preprint_date': '2016-11-16',
        'public_notes': [
            {
                'source': 'arXiv',
                'value': '100 pages, 36 figures'
            }
        ],
        'titles': [
            {
                'title': 'Alias Investigations'
            }
        ],
        'dois': [
            {
                'value': '10.1007/978-3-319-15001-7'
            }
        ],
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    current_search.flush_and_refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(
        pid_type='lit',
        pid_value=record['control_number']
    )

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
Exemplo n.º 23
0
def update_authors_recid(record_id, uuid, profile_recid):
    """Update author profile for a given signature.

    The method receives UUIDs representing record and signature
    respectively together with an author profile recid.
    The new recid will be placed in the signature with the given
    UUID.

    :param record_id:
        A string representing UUID of a given record.

        Example:
            record_id = "a5afb151-8f75-4e91-8dc1-05e7e8e8c0b8"

    :param uuid:
        A string representing UUID of a given signature.

        Example:
            uuid = "c2f432bd-2f52-4c16-ac66-096f168c762f"

    :param profile_recid:
        A string representing author profile recid, that
        updated signature should point to.

        Example:
            profile_recid = "1"
    """
    try:
        record = InspireRecord.get_record(record_id)
        update_flag = False

        for author in record['authors']:
            if author['uuid'] == uuid:
                author['recid'] = str(profile_recid)
                update_flag = True

        if update_flag:
            # Disconnect the signal on insert of a new record.
            before_record_index.disconnect(append_updated_record_to_queue)

            # Update the record in the database.
            record.commit()
            db.session.commit()
    except StaleDataError as exc:
        raise update_authors_recid.retry(exc=exc)
    finally:
        # Reconnect the disconnected signal.
        before_record_index.connect(append_updated_record_to_queue)

    # Report.
    logger.info("Updated signature %s with profile %s",
                uuid, profile_recid)
Exemplo n.º 24
0
def update_authors_recid(record_id, uuid, profile_recid):
    """Update author profile for a given signature.

    The method receives UUIDs representing record and signature
    respectively together with an author profile recid.
    The new recid will be placed in the signature with the given
    UUID.

    :param record_id:
        A string representing UUID of a given record.

        Example:
            record_id = "a5afb151-8f75-4e91-8dc1-05e7e8e8c0b8"

    :param uuid:
        A string representing UUID of a given signature.

        Example:
            uuid = "c2f432bd-2f52-4c16-ac66-096f168c762f"

    :param profile_recid:
        A string representing author profile recid, that
        updated signature should point to.

        Example:
            profile_recid = "1"
    """
    try:
        record = InspireRecord.get_record(record_id)
        update_flag = False

        for author in record['authors']:
            if author['uuid'] == uuid:
                author['recid'] = str(profile_recid)
                update_flag = True

        if update_flag:
            # Disconnect the signal on insert of a new record.
            before_record_index.disconnect(append_updated_record_to_queue)

            # Update the record in the database.
            record.commit()
            db.session.commit()
    except StaleDataError as exc:
        raise update_authors_recid.retry(exc=exc)
    finally:
        # Reconnect the disconnected signal.
        before_record_index.connect(append_updated_record_to_queue)

    # Report.
    logger.info("Updated signature %s with profile %s", uuid, profile_recid)
Exemplo n.º 25
0
def test_append_updated_record_to_queue(small_app):
    """Test the receiver responsible for queuing updated HEP records."""
    pid = PersistentIdentifier.get('lit', 4328)
    publication_id = str(pid.object_uuid)
    record = InspireRecord.get_record(publication_id)

    record_to_update = deepcopy(record)
    record_to_update['authors'][0]['full_name'] = 'John Smith'

    append_updated_record_to_queue(None, record_to_update, record_to_update,
                                   'records-hep', 'hep')

    assert str(record_to_update.id) == \
        DisambiguationRecord.query.order_by(desc('id')).first().record_id
Exemplo n.º 26
0
def test_append_updated_record_to_queue(small_app):
    """Test the receiver responsible for queuing updated HEP records."""
    pid = PersistentIdentifier.get('lit', 4328)
    publication_id = str(pid.object_uuid)
    record = InspireRecord.get_record(publication_id)

    record_to_update = deepcopy(record)
    record_to_update['authors'][0]['full_name'] = "John Smith"

    append_updated_record_to_queue(None, record_to_update, record_to_update,
                                   "records-hep", "hep")

    assert str(record_to_update.id) == \
        DisambiguationRecord.query.order_by(desc("id")).first().record_id
Exemplo n.º 27
0
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    head_record = InspireRecord.get_record(head_uuid)
    update = obj.data
    update_source = LiteratureReader(obj.data).source
    head_root = read_wf_record_source(record_uuid=head_record.id,
                                      source=update_source.lower())
    head_root = head_root.json if head_root else {}

    obj.extra_data['head_uuid'] = str(head_uuid)
    obj.extra_data['head_version_id'] = head_record.model.version_id
    obj.extra_data['merger_head_revision'] = head_record.revision_id
    obj.extra_data['merger_original_root'] = deepcopy(head_root)

    merged, conflicts = merge(
        head=head_record.to_dict(),
        root=head_root,
        update=update,
    )

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['conflicts_metadata'] = {
            'datetime': datetime.now().strftime("%b %d, %Y, %H:%M:%S %p"),
            'update_source': update_source,
        }
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()
Exemplo n.º 28
0
def record_from_db(workflow_app):
    json = {
        '$schema':
        'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'document_type': ['article'],
        'titles': [{
            'title': 'Fancy title for a new record'
        }],
        'arxiv_eprints': [{
            'categories': ['hep-th'],
            'value': '1407.7587'
        }],
        'control_number':
        1234,
        'authors': [
            {
                'full_name': 'Maldacena, J.'
            },
            {
                'full_name': 'Strominger, A.'
            },
        ],
        'abstracts': [{
            'source': 'arxiv',
            'value': 'A basic abstract.'
        }],
        'report_numbers': [{
            'value': 'DESY-17-036'
        }]
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(pid_type='lit',
                                   pid_value=record['control_number'])

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
Exemplo n.º 29
0
def test_create_author_method(small_app):
    """Test the method for generating new author profiles."""
    signature = {
        'affiliations': [{'value': 'Copenhagen U.'}],
        'curated_relation': False,
        'full_name': 'Glashow, S.L.',
        'uuid': '6a3d43be-e962-4c20-8908-a81bd39447b5'
    }

    recid = create_author(signature)
    pid = PersistentIdentifier.get('aut', recid)
    record = InspireRecord.get_record(pid.object_uuid)

    assert record['_collections'] == ['Authors']
    assert record['name'] == {'value': 'Glashow, S.L.'}
    assert record['positions'] == [{'institution': {'name': 'Copenhagen U.'}}]
Exemplo n.º 30
0
def _is_stale_data(workflow_object):
    is_update = workflow_object.extra_data.get('is-update')
    head_version_id = workflow_object.extra_data.get('head_version_id')

    if not is_update or head_version_id is None:
        return False

    head_uuid = workflow_object.extra_data.get('head_uuid')
    record = InspireRecord.get_record(head_uuid)

    if record.model.version_id != head_version_id:
        workflow_object.log.info(
            'Working with stale data:', 'Expecting version %d but found %d' %
            (head_version_id, record.revision_id))
        return True
    return False
Exemplo n.º 31
0
def _is_stale_data(workflow_object):
    is_update = workflow_object.extra_data.get('is-update')
    head_version_id = workflow_object.extra_data.get('head_version_id')

    if not is_update or head_version_id is None:
        return False

    head_uuid = workflow_object.extra_data.get('head_uuid')
    record = InspireRecord.get_record(head_uuid)

    if record.model.version_id != head_version_id:
        workflow_object.log.info(
            'Working with stale data:',
            'Expecting version %d but found %d' % (
                head_version_id, record.revision_id
            )
        )
        return True
    return False
Exemplo n.º 32
0
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        When the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    obj.extra_data['head_uuid'] = str(head_uuid)

    head = InspireRecord.get_record(head_uuid)
    update = obj.data
    update_source = get_source(update).lower()
    head_root = read_wf_record_source(record_uuid=head.id,
                                      source=update_source)
    head_root = head_root.json if head_root else {}

    merged, conflicts = merge(
        head=head.dumps(),
        root=head_root,
        update=update,
    )

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()
Exemplo n.º 33
0
    def _delete_action(self, payload):
        """
        Bulk delete action.
        Args:
            payload: Decoded message body.

        Returns:
            Dictionary defining an Elasticsearch bulk 'delete' action.

        """
        index, doc_type = payload.get('index'), payload.get('doc_type')
        if not (index and doc_type):
            record = InspireRecord.get_record(payload['id'])
            index, doc_type = self.record_to_index(record)

        return {
            '_op_type': 'delete',
            '_index': index,
            '_type': doc_type,
            '_id': payload['id'],
        }
Exemplo n.º 34
0
def modify_record(pid_type, pid_value):
    """
    Context manager to modify metadata of a single record by PID.

    The context manager makes a `dict` containing all metadata of the record
    available inside the ``with`` block. Modifying that ``dict`` will perform
    the modifications at the end of the block.

    Example:
        >>> with modify_record('lit', 1505221) as data:
        ...     data['titles'][0] = {'title': 'My new title'}
    """
    uuid = PersistentIdentifier.query.filter_by(pid_type=pid_type, pid_value=str(pid_value)).one().object_uuid
    record = InspireRecord.get_record(uuid)
    data = record.to_dict()

    yield data

    record.clear()
    record.update(data)
    record.commit()
    db.session.commit()
Exemplo n.º 35
0
def store_record(obj, eng):
    """Insert or replace a record."""
    is_update = obj.extra_data.get('is-update')
    if is_update:
        record = InspireRecord.get_record(obj.extra_data['head_uuid'])
        record.clear()
        record.update(obj.data, files_src_records=[obj])

    else:
        record = InspireRecord.create(obj.data, id_=None)
        # Create persistent identifier.
        created_pid = inspire_recid_minter(str(record.id), record).pid_value
        # Now that we have a recid, we can properly download the documents
        record.download_documents_and_figures(src_records=[obj])

        obj.data['control_number'] = created_pid
        # store head_uuid to store the root later
        obj.extra_data['head_uuid'] = str(record.id)

    record.commit()
    obj.save()
    db.session.commit()
Exemplo n.º 36
0
    def _index_action(self, payload):
        """
        Bulk index action.
        Args:
            payload: Decoded message body.

        Returns:
            Dictionary defining an Elasticsearch bulk 'index' action.

        """
        record = InspireRecord.get_record(payload['id'])
        index, doc_type = self.record_to_index(record)

        return {
            '_op_type': 'index',
            '_index': index,
            '_type': doc_type,
            '_id': str(record.id),
            '_version': record.revision_id,
            '_version_type': self._version_type,
            '_source': self._prepare_record(record, index, doc_type),
        }
Exemplo n.º 37
0
def record_insert_or_replace(json):
    """Insert or replace a record."""
    control_number = json.get('control_number', json.get('recid'))
    if control_number:
        pid_type = get_pid_type_from_schema(json['$schema'])
        try:
            pid = PersistentIdentifier.get(pid_type, control_number)
            record = InspireRecord.get_record(pid.object_uuid)
            record.clear()
            record.update(json)
            record.commit()
        except PIDDoesNotExistError:
            record = InspireRecord.create(json, id_=None)
            # Create persistent identifier.
            inspire_recid_minter(str(record.id), json)

        if json.get('deleted'):
            new_recid = get_recid_from_ref(json.get('new_record'))
            if not new_recid:
                record.delete()

        return record
Exemplo n.º 38
0
def record_from_db(workflow_app):
    json = {
        '$schema': 'http://localhost:5000/schemas/records/hep.json',
        '_collections': ['Literature'],
        'document_type': ['article'],
        'titles': [{'title': 'Fancy title for a new record'}],
        'arxiv_eprints': [
            {'categories': ['hep-th'], 'value': '1407.7587'}
        ],
        'control_number': 1234,
        'authors': [
            {'full_name': 'Maldacena, J.'},
            {'full_name': 'Strominger, A.'},
        ],
        'abstracts': [
            {'source': 'arxiv', 'value': 'A basic abstract.'}
        ],
        'report_numbers': [{'value': 'DESY-17-036'}]
    }
    record = InspireRecord.create(json, id_=None, skip_files=True)
    record.commit()
    rec_uuid = record.id

    db.session.commit()
    es.indices.refresh('records-hep')

    yield record

    record = InspireRecord.get_record(rec_uuid)
    pid = PersistentIdentifier.get(
        pid_type='lit',
        pid_value=record['control_number']
    )

    pid.unassign()
    pid.delete()
    record.delete()
    record.commit()
Exemplo n.º 39
0
def modify_record(pid_type, pid_value):
    """
    Context manager to modify metadata of a single record by PID.

    The context manager makes a `dict` containing all metadata of the record
    available inside the ``with`` block. Modifying that ``dict`` will perform
    the modifications at the end of the block.

    Example:
        >>> with modify_record('lit', 1505221) as data:
        ...     data['titles'][0] = {'title': 'My new title'}
    """
    uuid = PersistentIdentifier.query.filter_by(
        pid_type=pid_type, pid_value=str(pid_value)).one().object_uuid
    record = InspireRecord.get_record(uuid)
    data = record.to_dict()

    yield data

    record.clear()
    record.update(data)
    record.commit()
    db.session.commit()
Exemplo n.º 40
0
def merge_articles(obj, eng):
    """Merge two articles.

    The workflow payload is overwritten by the merged record, the conflicts are
    stored in ``extra_data.conflicts``. Also, it adds a ``callback_url`` which
    contains the endpoint which resolves the merge conflicts.

    Note:
        For the time being the ``root`` will be ignored, and we'll rely only
        on the ``head``, hence it is a rootless implementation. Also when
        the feature flag ``FEATURE_FLAG_ENABLE_MERGER`` is ``False`` it
        will skip the merge.

    """
    if not current_app.config.get('FEATURE_FLAG_ENABLE_MERGER'):
        return None

    matched_control_number = obj.extra_data['matches']['approved']

    head_uuid = PersistentIdentifier.get('lit',
                                         matched_control_number).object_uuid

    obj.extra_data['head_uuid'] = str(head_uuid)

    head = InspireRecord.get_record(head_uuid)
    root = {}
    update = obj.data

    merged, conflicts = merge(head=head.dumps(), root=root, update=update)

    obj.data = merged

    if conflicts:
        obj.extra_data['conflicts'] = conflicts
        obj.extra_data['callback_url'] = \
            get_resolve_merge_conflicts_callback_url()
    obj.save()
Exemplo n.º 41
0
def test_single_signature_with_no_profile(small_app):
    """Check the module for the case with a single, new signature."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep', id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({}, {
                   "0": [author_uuid]
               }))):
        with patch(
                "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
Exemplo n.º 42
0
def test_single_signature_with_no_profile(small_app):
    """Check the module for the case with a single, new signature."""
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering,
        update_authors_recid
    )

    record_id = str(PersistentIdentifier.get('lit', 11883).object_uuid)
    record = get_es_record_by_uuid(record_id)
    author_uuid = record['authors'][0]['uuid']

    # Add phonetic block to the record.
    record['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=record_id, body=record)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({}, {"0": [author_uuid]}))):
        with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                   side_effect=update_authors_recid):
            disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(record_id)['authors'][0]['recid'] == "1"
Exemplo n.º 43
0
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering, update_authors_recid)

    # Claimed signature #1.
    glashow_record_id_claimed = str(
        PersistentIdentifier.get('lit', 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed['authors'][0]['uuid']

    # Add phonetic block to the record.
    glashow_record_claimed['authors'][0]['signature_block'] = "HAGp"
    glashow_record_claimed['authors'][0]['curated_relation'] = True
    glashow_record_claimed['authors'][0]['recid'] = "3"
    es.index(index='records-hep',
             doc_type='hep',
             id=glashow_record_id_claimed,
             body=glashow_record_claimed)
    es.indices.refresh('records-hep')

    # Claimed signature #2.
    higgs_record_id_claimed = str(
        PersistentIdentifier.get('lit', 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed['authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_claimed['authors'][0]['signature_block'] = "HAGp"
    higgs_record_claimed['authors'][0]['curated_relation'] = True
    higgs_record_claimed['authors'][0]['recid'] = "4"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_claimed,
             body=higgs_record_claimed)
    es.indices.refresh('records-hep')

    # Not claimed signature.
    higgs_record_id_not_claimed = str(
        PersistentIdentifier.get('lit', 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(
        higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed['authors'][0][
        'uuid']

    # Add phonetic block to the record.
    higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep',
             doc_type='hep',
             id=higgs_record_id_not_claimed,
             body=higgs_record_not_claimed)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(({
                   "3": [
                       glashow_record_uuid_claimed, higgs_record_uuid_claimed,
                       higgs_record_uuid_not_claimed
                   ]
               }, {}))):
        with patch(
                "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
                return_value=_ConflictObject({
                    higgs_record_uuid_claimed: [higgs_record_uuid_not_claimed]
                })):
            with patch(
                    "inspirehep.modules.disambiguation.tasks.update_authors_recid.delay",
                    side_effect=update_authors_recid):
                disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(
        higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
Exemplo n.º 44
0
def get_db_record(pid_type, recid):
    from inspirehep.modules.records.api import InspireRecord
    pid = PersistentIdentifier.get(pid_type, recid)
    return InspireRecord.get_record(pid.object_uuid)
Exemplo n.º 45
0
def test_solve_claim_conflicts(small_app):
    """Check the module for the case where at least two claimed
    signatures are assigned to the same cluster.
    """
    from inspirehep.modules.disambiguation.tasks import (
        disambiguation_clustering,
        update_authors_recid
    )

    # Claimed signature #1.
    glashow_record_id_claimed = str(
        PersistentIdentifier.get('lit', 4328).object_uuid)
    glashow_record_claimed = get_es_record_by_uuid(
        glashow_record_id_claimed)
    glashow_record_uuid_claimed = glashow_record_claimed[
        'authors'][0]['uuid']

    # Add phonetic block to the record.
    glashow_record_claimed['authors'][0]['signature_block'] = "HAGp"
    glashow_record_claimed['authors'][0]['curated_relation'] = True
    glashow_record_claimed['authors'][0]['recid'] = "3"
    es.index(index='records-hep', doc_type='hep',
             id=glashow_record_id_claimed, body=glashow_record_claimed)
    es.indices.refresh('records-hep')

    # Claimed signature #2.
    higgs_record_id_claimed = str(
        PersistentIdentifier.get('lit', 1358492).object_uuid)
    higgs_record_claimed = get_es_record_by_uuid(
        higgs_record_id_claimed)
    higgs_record_uuid_claimed = higgs_record_claimed[
        'authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_claimed['authors'][0]['signature_block'] = "HAGp"
    higgs_record_claimed['authors'][0]['curated_relation'] = True
    higgs_record_claimed['authors'][0]['recid'] = "4"
    es.index(index='records-hep', doc_type='hep',
             id=higgs_record_id_claimed, body=higgs_record_claimed)
    es.indices.refresh('records-hep')

    # Not claimed signature.
    higgs_record_id_not_claimed = str(
        PersistentIdentifier.get('lit', 11883).object_uuid)
    higgs_record_not_claimed = get_es_record_by_uuid(
        higgs_record_id_not_claimed)
    higgs_record_uuid_not_claimed = higgs_record_not_claimed[
        'authors'][0]['uuid']

    # Add phonetic block to the record.
    higgs_record_not_claimed['authors'][0]['signature_block'] = "HAGp"
    es.index(index='records-hep', doc_type='hep',
             id=higgs_record_id_not_claimed, body=higgs_record_not_claimed)
    es.indices.refresh('records-hep')

    with patch("celery.current_app.send_task",
               return_value=_BeardObject(
                   ({"3": [glashow_record_uuid_claimed,
                           higgs_record_uuid_claimed,
                           higgs_record_uuid_not_claimed]}, {}))):
        with patch(
            "inspirehep.modules.disambiguation.logic._solve_claims_conflict",
            return_value=_ConflictObject(
                {higgs_record_uuid_claimed: [
                    higgs_record_uuid_not_claimed]})):
            with patch("inspirehep.modules.disambiguation.tasks.update_authors_recid.delay", side_effect=update_authors_recid):
                disambiguation_clustering("HAGp")

    assert InspireRecord.get_record(
        higgs_record_id_not_claimed)['authors'][0]['recid'] == "4"
Exemplo n.º 46
0
 def delete_by_id(self, record_uuid):
     """Delete record from index by record identifier."""
     self.delete(InspireRecord.get_record(record_uuid))
Exemplo n.º 47
0
def get_db_record(pid_type, recid):
    from inspirehep.modules.records.api import InspireRecord
    pid = PersistentIdentifier.get(pid_type, recid)
    return InspireRecord.get_record(pid.object_uuid)