Exemplo n.º 1
0
def test_search_query_with_spires_syntax_in_literature_search():
    query = u"a First Middle Last"
    expected_full_statement = inspire_query_parser.parse_query(query)
    expected_must_statement = expected_full_statement['nested']['query']['bool']['filter']

    search = LiteratureSearch().query_from_iq(query)
    search_dict = search.to_dict()
    must_statement = search_dict['query']['bool']['must']

    assert len(must_statement) == 1
    must_statement = must_statement[0]['nested']
    assert must_statement['path'] == 'authors'
    assert must_statement['query']['bool']['filter'][0] == expected_must_statement
Exemplo n.º 2
0
def test_inspire_search_filter(app, app_client, user_info, total_count, es_filter):
    """Test default inspire search filter."""

    if user_info:
        login_user_via_view(app_client, email=user_info['email'],
                            password=user_info['password'],
                            login_url='/login/?local=1')

    # Doing a client request creates a request context that allows the
    # assert to correctly use the logged in user.
    app_client.get('/search')
    assert LiteratureSearch().to_dict()['query']['bool'][
        'filter'] == es_filter
    assert LiteratureSearch().count() == total_count
Exemplo n.º 3
0
def get_literature_recids_for_orcid(orcid):
    """Return the Literature recids that were claimed by an ORCiD.

    We record the fact that the Author record X has claimed the Literature
    record Y by storing in Y an author object with a ``$ref`` pointing to X
    and the key ``curated_relation`` set to ``True``. Therefore this method
    first searches the DB for the Author records for the one containing the
    given ORCiD, and then uses its recid to search in ES for the Literature
    records that satisfy the above property.

    Args:
        orcid (str): the ORCiD.

    Return:
        list(int): the recids of the Literature records that were claimed
        by that ORCiD.

    """
    orcid_object = '[{"schema": "ORCID", "value": "%s"}]' % orcid
    # this first query is written in a way that can use the index on (json -> ids)
    author_rec_uuid = db.session.query(RecordMetadata.id)\
        .filter(type_coerce(RecordMetadata.json, JSONB)['ids'].contains(orcid_object)).one().id
    author_recid = db.session.query(PersistentIdentifier.pid_value).filter(
        PersistentIdentifier.object_type == 'rec',
        PersistentIdentifier.object_uuid == author_rec_uuid,
        PersistentIdentifier.pid_type == 'aut',
    ).one().pid_value

    query = Q('match', authors__curated_relation=True) & Q(
        'match', authors__recid=author_recid)
    search_by_curated_author = LiteratureSearch().query('nested', path='authors', query=query)\
                                                 .params(_source=['control_number'], size=9999)

    return [el['control_number'] for el in search_by_curated_author]
Exemplo n.º 4
0
def test_select_source_function_in_literature_search(request_mocked):
    request_mocked.headers.get.return_value = 'application/vnd+inspire.record.ui+json'
    expected_source = {
        'includes': ['$schema', 'control_number', '_ui_display']
    }

    search = select_source(LiteratureSearch())
    search_source = search.to_dict()['_source']

    assert search_source == expected_source
Exemplo n.º 5
0
def _process_record(pid, app):
    if stop:
        return
    with app.app_context():
        success = False
        deleted = False
        no_cits = False
        db_cits = None
        es_cits = None
        es_citation_count_field = None
        data = {}
        rec = get_db_record('lit', pid.pid_value)
        if rec.get('deleted'):
            success = True
            deleted = True
        if not deleted:
            try:
                es_cits = get_citations_from_es(rec).total
                search = LiteratureSearch().source(includes=['citation_count'])
                results = search.get_record(rec.id).execute()
                if not results.hits:
                    es_citation_count_field = None
                else:
                    es_citation_count_field = results.hits[0]['citation_count']
                db_cits = rec.get_citations_count()
            except Exception as err:
                click.echo("Cannot prepare data for %s record. %s",
                           pid.pid_value, err)
        if not deleted and es_cits is not None and es_cits == db_cits == es_citation_count_field:
            if es_cits == 0:
                no_cits = True
            success = True
        else:
            data = {
                'pid_value': pid.pid_value,
                'db_citations_count': db_cits,
                'es_citations_count': es_cits,
                'es_citations_field': es_citation_count_field
            }
        return (success, deleted, no_cits, data)
Exemplo n.º 6
0
def _process_record(pid, app):
    if stop:
        return
    with app.app_context():
        success = False
        deleted = False
        no_cits = False
        db_cits = None
        es_cits = None
        es_citation_count_field = None
        data = {}
        rec = get_db_record('lit', pid.pid_value)
        if rec.get('deleted'):
            success = True
            deleted = True
        if not deleted:
            try:
                es_cits = LiteratureSearch.citations(rec).total
                search = LiteratureSearch().source(includes=['citation_count'])
                results = search.get_record(rec.id).execute()
                if not results.hits:
                    es_citation_count_field = None
                else:
                    es_citation_count_field = results.hits[0]['citation_count']
                db_cits = rec.get_citations_count()
            except Exception as err:
                click.echo("Cannot prepare data for %s record. %s",
                           pid.pid_value,
                           err)
        if not deleted and es_cits is not None and es_cits == db_cits == es_citation_count_field:
            if es_cits == 0:
                no_cits = True
            success = True
        else:
            data = {'pid_value': pid.pid_value,
                    'db_citations_count': db_cits,
                    'es_citations_count': es_cits,
                    'es_citations_field': es_citation_count_field}
        return (success, deleted, no_cits, data)
Exemplo n.º 7
0
def test_select_source_function_in_literature_search():
    expected_source = {
        'includes': [
            '$schema', 'abstracts.value', 'arxiv_eprints.value',
            'arxiv_eprints.categories', 'authors.affiliations',
            'authors.full_name', 'authors.control_number', 'collaborations',
            'control_number', 'citation_count', 'dois.value', 'earliest_date',
            'inspire_categories', 'number_of_references', 'publication_info',
            'report_numbers', 'titles.title'
        ]
    }

    search = select_source(LiteratureSearch())
    search_source = search.to_dict()['_source']

    assert search_source == expected_source
Exemplo n.º 8
0
def get_signatures_matching_a_phonetic_encoding(phonetic_encoding):
    """Get all signatures matching a phonetic encoding from ES.

    Args:
        phonetic_encodings(str): a phonetic encoding.

    Yields:
        dict: a signature matching the phonetic encoding.

    """
    query = Q('term', authors__signature_block__raw=phonetic_encoding)
    search_by_phonetic_encoding = LiteratureSearch().query('nested', path='authors', query=query)\
                                                    .params(_source=SIGNATURE_FIELDS, size=9999)

    for record in search_by_phonetic_encoding:
        for signature in record.authors:
            if signature.signature_block == phonetic_encoding:
                yield signature.to_dict()
Exemplo n.º 9
0
def get_signatures_matching_a_phonetic_encoding(phonetic_encoding):
    """Get all signatures matching a phonetic encoding from ES.

    Args:
        phonetic_encodings(str): a phonetic encoding.

    Yields:
        dict: a signature matching the phonetic encoding.

    """
    query = Q('term', authors__signature_block__raw=phonetic_encoding)
    search_by_phonetic_encoding = LiteratureSearch().query('nested', path='authors', query=query)\
                                                    .params(_source=SIGNATURE_FIELDS, size=9999)

    for record in search_by_phonetic_encoding:
        record = record.to_dict()
        publication_id = record['control_number']
        for author in record.get('authors', []):
            if author.get('signature_block') == phonetic_encoding:
                yield _build_signature(author, publication_id)